audio_processing/aec: Refactors NonLinearProcessing to prepare for NEON optimizations
Puts functionality necessary to calculate sub-band coherences into a function.
BUG=3131
TESTED=trybots
R=kwiberg@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/16789004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@6570 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index 207c6dc..2fd298c 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
@@ -415,11 +415,167 @@
}
}
+static int PartitionDelay(const AecCore* aec) {
+ // Measures the energy in each filter partition and returns the partition with
+ // highest energy.
+ // TODO(bjornv): Spread computational cost by computing one partition per
+ // block?
+ float wfEnMax = 0;
+ int i;
+ int delay = 0;
+
+ for (i = 0; i < aec->num_partitions; i++) {
+ int j;
+ int pos = i * PART_LEN1;
+ float wfEn = 0;
+ for (j = 0; j < PART_LEN1; j++) {
+ wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+ aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+ }
+
+ if (wfEn > wfEnMax) {
+ wfEnMax = wfEn;
+ delay = i;
+ }
+ }
+ return delay;
+}
+
+// Threshold to protect against the ill-effects of a zero far-end.
+static const float kMinFarendPSD = 15;
+
+// Updates the following smoothed Power Spectral Densities (PSD):
+// - sd : near-end
+// - se : residual echo
+// - sx : far-end
+// - sde : cross-PSD of near-end and residual echo
+// - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float dfw[2][PART_LEN1],
+ float xfw[2][PART_LEN1]) {
+ // Power estimate smoothing coefficients.
+ const float* ptrGCoh = aec->extended_filter_enabled
+ ? kExtendedSmoothingCoefficients[aec->mult - 1]
+ : kNormalSmoothingCoefficients[aec->mult - 1];
+ int i;
+ float sdSum = 0, seSum = 0;
+
+ for (i = 0; i < PART_LEN1; i++) {
+ aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+ ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+ aec->se[i] = ptrGCoh[0] * aec->se[i] +
+ ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+ // We threshold here to protect against the ill-effects of a zero farend.
+ // The threshold is not arbitrarily chosen, but balances protection and
+ // adverse interaction with the algorithm's tuning.
+ // TODO(bjornv): investigate further why this is so sensitive.
+ aec->sx[i] =
+ ptrGCoh[0] * aec->sx[i] +
+ ptrGCoh[1] * WEBRTC_SPL_MAX(
+ xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], kMinFarendPSD);
+
+ aec->sde[i][0] =
+ ptrGCoh[0] * aec->sde[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+ aec->sde[i][1] =
+ ptrGCoh[0] * aec->sde[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+ aec->sxd[i][0] =
+ ptrGCoh[0] * aec->sxd[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+ aec->sxd[i][1] =
+ ptrGCoh[0] * aec->sxd[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+ sdSum += aec->sd[i];
+ seSum += aec->se[i];
+ }
+
+ // Divergent filter safeguard.
+ aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+ if (aec->divergeState)
+ memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+ // Reset if error is significantly larger than nearend (13 dB).
+ if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+ memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+ int i;
+ for (i = 0; i < PART_LEN; i++) {
+ x_windowed[i] = x[i] * sqrtHanning[i];
+ x_windowed[PART_LEN + i] = x[PART_LEN + i] * sqrtHanning[PART_LEN - i];
+ }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+ float data_complex[2][PART_LEN1]) {
+ int i;
+ data_complex[0][0] = data[0];
+ data_complex[1][0] = 0;
+ for (i = 1; i < PART_LEN; i++) {
+ data_complex[0][i] = data[2 * i];
+ data_complex[1][i] = data[2 * i + 1];
+ }
+ data_complex[0][PART_LEN] = data[1];
+ data_complex[1][PART_LEN] = 0;
+}
+
+static void SubbandCoherence(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd) {
+ float dfw[2][PART_LEN1];
+ int i;
+
+ if (aec->delayEstCtr == 0)
+ aec->delayIdx = PartitionDelay(aec);
+
+ // Use delayed far.
+ memcpy(xfw,
+ aec->xfwBuf + aec->delayIdx * PART_LEN1,
+ sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+ // Windowed near fft
+ WindowData(fft, aec->dBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, dfw);
+
+ // Windowed error fft
+ WindowData(fft, aec->eBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, efw);
+
+ SmoothedPSD(aec, efw, dfw, xfw);
+
+ // Subband coherence
+ for (i = 0; i < PART_LEN1; i++) {
+ cohde[i] =
+ (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+ (aec->sd[i] * aec->se[i] + 1e-10f);
+ cohxd[i] =
+ (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+ (aec->sx[i] * aec->sd[i] + 1e-10f);
+ }
+}
+
WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
+WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
int i;
@@ -571,6 +727,7 @@
WebRtcAec_FilterAdaptation = FilterAdaptation;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
WebRtcAec_ComfortNoise = ComfortNoise;
+ WebRtcAec_SubbandCoherence = SubbandCoherence;
#if defined(WEBRTC_ARCH_X86_FAMILY)
if (WebRtc_GetCPUInfo(kSSE2)) {
@@ -1024,12 +1181,12 @@
}
static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
- float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
+ float efw[2][PART_LEN1], xfw[2][PART_LEN1];
complex_t comfortNoiseHband[PART_LEN1];
float fft[PART_LEN2];
float scale, dtmp;
float nlpGainHband;
- int i, j, pos;
+ int i;
// Coherence and non-linear filter
float cohde[PART_LEN1], cohxd[PART_LEN1];
@@ -1040,20 +1197,12 @@
const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
const int prefBandSize = kPrefBandSize / aec->mult;
const int minPrefBand = 4 / aec->mult;
-
- // Near and error power sums
- float sdSum = 0, seSum = 0;
-
// Power estimate smoothing coefficients.
- const float* ptrGCoh = aec->extended_filter_enabled
- ? kExtendedSmoothingCoefficients[aec->mult - 1]
- : kNormalSmoothingCoefficients[aec->mult - 1];
const float* min_overdrive = aec->extended_filter_enabled
? kExtendedMinOverDrive
: kNormalMinOverDrive;
// Filter energy
- float wfEnMax = 0, wfEn = 0;
const int delayEstInterval = 10 * aec->mult;
float* xfw_ptr = NULL;
@@ -1068,26 +1217,6 @@
nlpGainHband = (float)0.0;
dtmp = (float)0.0;
- // Measure energy in each filter partition to determine delay.
- // TODO: Spread by computing one partition per block?
- if (aec->delayEstCtr == 0) {
- wfEnMax = 0;
- aec->delayIdx = 0;
- for (i = 0; i < aec->num_partitions; i++) {
- pos = i * PART_LEN1;
- wfEn = 0;
- for (j = 0; j < PART_LEN1; j++) {
- wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
- aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
- }
-
- if (wfEn > wfEnMax) {
- wfEnMax = wfEn;
- aec->delayIdx = i;
- }
- }
- }
-
// We should always have at least one element stored in |far_buf|.
assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
// NLP
@@ -1098,104 +1227,7 @@
// Buffer far.
memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
- // Use delayed far.
- memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
-
- // Windowed near fft
- for (i = 0; i < PART_LEN; i++) {
- fft[i] = aec->dBuf[i] * sqrtHanning[i];
- fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
- }
- aec_rdft_forward_128(fft);
-
- dfw[1][0] = 0;
- dfw[1][PART_LEN] = 0;
- dfw[0][0] = fft[0];
- dfw[0][PART_LEN] = fft[1];
- for (i = 1; i < PART_LEN; i++) {
- dfw[0][i] = fft[2 * i];
- dfw[1][i] = fft[2 * i + 1];
- }
-
- // Windowed error fft
- for (i = 0; i < PART_LEN; i++) {
- fft[i] = aec->eBuf[i] * sqrtHanning[i];
- fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
- }
- aec_rdft_forward_128(fft);
- efw[1][0] = 0;
- efw[1][PART_LEN] = 0;
- efw[0][0] = fft[0];
- efw[0][PART_LEN] = fft[1];
- for (i = 1; i < PART_LEN; i++) {
- efw[0][i] = fft[2 * i];
- efw[1][i] = fft[2 * i + 1];
- }
-
- // Smoothed PSD
- for (i = 0; i < PART_LEN1; i++) {
- aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
- ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
- aec->se[i] = ptrGCoh[0] * aec->se[i] +
- ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
- // We threshold here to protect against the ill-effects of a zero farend.
- // The threshold is not arbitrarily chosen, but balances protection and
- // adverse interaction with the algorithm's tuning.
- // TODO: investigate further why this is so sensitive.
- aec->sx[i] =
- ptrGCoh[0] * aec->sx[i] +
- ptrGCoh[1] *
- WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
-
- aec->sde[i][0] =
- ptrGCoh[0] * aec->sde[i][0] +
- ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
- aec->sde[i][1] =
- ptrGCoh[0] * aec->sde[i][1] +
- ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
-
- aec->sxd[i][0] =
- ptrGCoh[0] * aec->sxd[i][0] +
- ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
- aec->sxd[i][1] =
- ptrGCoh[0] * aec->sxd[i][1] +
- ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
-
- sdSum += aec->sd[i];
- seSum += aec->se[i];
- }
-
- // Divergent filter safeguard.
- if (aec->divergeState == 0) {
- if (seSum > sdSum) {
- aec->divergeState = 1;
- }
- } else {
- if (seSum * 1.05f < sdSum) {
- aec->divergeState = 0;
- }
- }
-
- if (aec->divergeState == 1) {
- memcpy(efw, dfw, sizeof(efw));
- }
-
- if (!aec->extended_filter_enabled) {
- // Reset if error is significantly larger than nearend (13 dB).
- if (seSum > (19.95f * sdSum)) {
- memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
- }
- }
-
- // Subband coherence
- for (i = 0; i < PART_LEN1; i++) {
- cohde[i] =
- (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
- (aec->sd[i] * aec->se[i] + 1e-10f);
- cohxd[i] =
- (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
- (aec->sx[i] * aec->sd[i] + 1e-10f);
- }
+ WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
hNlXdAvg = 0;
for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h
index 1c560f9..372b427 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -170,4 +170,12 @@
const float* lambda);
extern WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
+typedef void (*WebRtcAec_SubbandCoherence_t)(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd);
+extern WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
+
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_