audio_processing/aec: Refactors NonLinearProcessing to prepare for NEON optimizations

Puts functionality necessary to calculate sub-band coherences into a function.

BUG=3131
TESTED=trybots
R=kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/16789004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6570 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index 207c6dc..2fd298c 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
@@ -415,11 +415,167 @@
   }
 }
 
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    for (j = 0; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+          aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Threshold to protect against the ill-effects of a zero far-end.
+static const float kMinFarendPSD = 15;
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+                             ? kExtendedSmoothingCoefficients[aec->mult - 1]
+                             : kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i++) {
+    x_windowed[i] = x[i] * sqrtHanning[i];
+    x_windowed[PART_LEN + i] = x[PART_LEN + i] * sqrtHanning[PART_LEN - i];
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  data_complex[0][0] = data[0];
+  data_complex[1][0] = 0;
+  for (i = 1; i < PART_LEN; i++) {
+    data_complex[0][i] = data[2 * i];
+    data_complex[1][i] = data[2 * i + 1];
+  }
+  data_complex[0][PART_LEN] = data[1];
+  data_complex[1][PART_LEN] = 0;
+}
+
+static void SubbandCoherence(AecCore* aec,
+                             float efw[2][PART_LEN1],
+                             float xfw[2][PART_LEN1],
+                             float* fft,
+                             float* cohde,
+                             float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  // Subband coherence
+  for (i = 0; i < PART_LEN1; i++) {
+    cohde[i] =
+        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+        (aec->sd[i] * aec->se[i] + 1e-10f);
+    cohxd[i] =
+        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+        (aec->sx[i] * aec->sd[i] + 1e-10f);
+  }
+}
+
 WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
 WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
 WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
 WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
 WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
+WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
 
 int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   int i;
@@ -571,6 +727,7 @@
   WebRtcAec_FilterAdaptation = FilterAdaptation;
   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
   WebRtcAec_ComfortNoise = ComfortNoise;
+  WebRtcAec_SubbandCoherence = SubbandCoherence;
 
 #if defined(WEBRTC_ARCH_X86_FAMILY)
   if (WebRtc_GetCPUInfo(kSSE2)) {
@@ -1024,12 +1181,12 @@
 }
 
 static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
-  float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
+  float efw[2][PART_LEN1], xfw[2][PART_LEN1];
   complex_t comfortNoiseHband[PART_LEN1];
   float fft[PART_LEN2];
   float scale, dtmp;
   float nlpGainHband;
-  int i, j, pos;
+  int i;
 
   // Coherence and non-linear filter
   float cohde[PART_LEN1], cohxd[PART_LEN1];
@@ -1040,20 +1197,12 @@
   const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
   const int prefBandSize = kPrefBandSize / aec->mult;
   const int minPrefBand = 4 / aec->mult;
-
-  // Near and error power sums
-  float sdSum = 0, seSum = 0;
-
   // Power estimate smoothing coefficients.
-  const float* ptrGCoh = aec->extended_filter_enabled
-                             ? kExtendedSmoothingCoefficients[aec->mult - 1]
-                             : kNormalSmoothingCoefficients[aec->mult - 1];
   const float* min_overdrive = aec->extended_filter_enabled
                                    ? kExtendedMinOverDrive
                                    : kNormalMinOverDrive;
 
   // Filter energy
-  float wfEnMax = 0, wfEn = 0;
   const int delayEstInterval = 10 * aec->mult;
 
   float* xfw_ptr = NULL;
@@ -1068,26 +1217,6 @@
   nlpGainHband = (float)0.0;
   dtmp = (float)0.0;
 
-  // Measure energy in each filter partition to determine delay.
-  // TODO: Spread by computing one partition per block?
-  if (aec->delayEstCtr == 0) {
-    wfEnMax = 0;
-    aec->delayIdx = 0;
-    for (i = 0; i < aec->num_partitions; i++) {
-      pos = i * PART_LEN1;
-      wfEn = 0;
-      for (j = 0; j < PART_LEN1; j++) {
-        wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-                aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
-      }
-
-      if (wfEn > wfEnMax) {
-        wfEnMax = wfEn;
-        aec->delayIdx = i;
-      }
-    }
-  }
-
   // We should always have at least one element stored in |far_buf|.
   assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
   // NLP
@@ -1098,104 +1227,7 @@
   // Buffer far.
   memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
 
-  // Use delayed far.
-  memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
-
-  // Windowed near fft
-  for (i = 0; i < PART_LEN; i++) {
-    fft[i] = aec->dBuf[i] * sqrtHanning[i];
-    fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
-  }
-  aec_rdft_forward_128(fft);
-
-  dfw[1][0] = 0;
-  dfw[1][PART_LEN] = 0;
-  dfw[0][0] = fft[0];
-  dfw[0][PART_LEN] = fft[1];
-  for (i = 1; i < PART_LEN; i++) {
-    dfw[0][i] = fft[2 * i];
-    dfw[1][i] = fft[2 * i + 1];
-  }
-
-  // Windowed error fft
-  for (i = 0; i < PART_LEN; i++) {
-    fft[i] = aec->eBuf[i] * sqrtHanning[i];
-    fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
-  }
-  aec_rdft_forward_128(fft);
-  efw[1][0] = 0;
-  efw[1][PART_LEN] = 0;
-  efw[0][0] = fft[0];
-  efw[0][PART_LEN] = fft[1];
-  for (i = 1; i < PART_LEN; i++) {
-    efw[0][i] = fft[2 * i];
-    efw[1][i] = fft[2 * i + 1];
-  }
-
-  // Smoothed PSD
-  for (i = 0; i < PART_LEN1; i++) {
-    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
-                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
-    aec->se[i] = ptrGCoh[0] * aec->se[i] +
-                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
-    // We threshold here to protect against the ill-effects of a zero farend.
-    // The threshold is not arbitrarily chosen, but balances protection and
-    // adverse interaction with the algorithm's tuning.
-    // TODO: investigate further why this is so sensitive.
-    aec->sx[i] =
-        ptrGCoh[0] * aec->sx[i] +
-        ptrGCoh[1] *
-            WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
-
-    aec->sde[i][0] =
-        ptrGCoh[0] * aec->sde[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
-    aec->sde[i][1] =
-        ptrGCoh[0] * aec->sde[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
-
-    aec->sxd[i][0] =
-        ptrGCoh[0] * aec->sxd[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
-    aec->sxd[i][1] =
-        ptrGCoh[0] * aec->sxd[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
-
-    sdSum += aec->sd[i];
-    seSum += aec->se[i];
-  }
-
-  // Divergent filter safeguard.
-  if (aec->divergeState == 0) {
-    if (seSum > sdSum) {
-      aec->divergeState = 1;
-    }
-  } else {
-    if (seSum * 1.05f < sdSum) {
-      aec->divergeState = 0;
-    }
-  }
-
-  if (aec->divergeState == 1) {
-    memcpy(efw, dfw, sizeof(efw));
-  }
-
-  if (!aec->extended_filter_enabled) {
-    // Reset if error is significantly larger than nearend (13 dB).
-    if (seSum > (19.95f * sdSum)) {
-      memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
-    }
-  }
-
-  // Subband coherence
-  for (i = 0; i < PART_LEN1; i++) {
-    cohde[i] =
-        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
-        (aec->sd[i] * aec->se[i] + 1e-10f);
-    cohxd[i] =
-        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
-        (aec->sx[i] * aec->sd[i] + 1e-10f);
-  }
+  WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
 
   hNlXdAvg = 0;
   for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h
index 1c560f9..372b427 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -170,4 +170,12 @@
                                          const float* lambda);
 extern WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
 
+typedef void (*WebRtcAec_SubbandCoherence_t)(AecCore* aec,
+                                             float efw[2][PART_LEN1],
+                                             float xfw[2][PART_LEN1],
+                                             float* fft,
+                                             float* cohde,
+                                             float* cohxd);
+extern WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
+
 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_