Reland "Revert "audio_processing/aec: make delay estimator aware of starving farend buffer""

Original review at https://codereview.webrtc.org/1180423006

SystemDelayTests was not updated w.r.t. extended_filter mode and some tests were disabled on Android since DA-AEC is automatically set.
All tests have now been updated for both extended_filter mode as well as DA-AEC, hence are now enabled on Android.

Also
* Moves default settings of extended_filter and DA-AEC form Init() to Create() to avoid unintentional loss of state during a reset.
* Fixes a potential bug of starting from scratch in extended_filter mode + DA-AEC.

This reverts commit 01c9b012e9171c813ace9e405c32fc75f4262bf6.

BUG=
R=henrik.lundin@webrtc.org

Review URL: https://codereview.webrtc.org/1187943005.

Cr-Commit-Position: refs/heads/master@{#9458}
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index 16e3389..a3f9f24 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
@@ -857,6 +857,14 @@
   }
 }
 
+static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) {
+  WebRtc_MoveReadPtr(self->far_buf_windowed, elements);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_MoveReadPtr(self->far_time_buf, elements);
+#endif
+  return WebRtc_MoveReadPtr(self->far_buf, elements);
+}
+
 static int SignalBasedDelayCorrection(AecCore* self) {
   int delay_correction = 0;
   int last_delay = -2;
@@ -897,9 +905,13 @@
     const int do_correction = delay <= lower_bound || delay > upper_bound;
     if (do_correction == 1) {
       int available_read = (int)WebRtc_available_read(self->far_buf);
-      // Adjust w.r.t. a |shift_offset| to account for not as reliable estimates
-      // in the beginning, hence we are more conservative.
-      delay_correction = -(delay - self->shift_offset);
+      // With |shift_offset| we gradually rely on the delay estimates.  For
+      // positive delays we reduce the correction by |shift_offset| to lower the
+      // risk of pushing the AEC into a non causal state.  For negative delays
+      // we rely on the values up to a rounding error, hence compensate by 1
+      // element to make sure to push the delay into the causal region.
+      delay_correction = -delay;
+      delay_correction += delay > self->shift_offset ? self->shift_offset : 1;
       self->shift_offset--;
       self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset);
       if (delay_correction > available_read - self->mult - 1) {
@@ -1440,12 +1452,15 @@
     return NULL;
   }
 #ifdef WEBRTC_ANDROID
+  aec->reported_delay_enabled = 0;  // DA-AEC enabled by default.
   // DA-AEC assumes the system is causal from the beginning and will self adjust
   // the lookahead when shifting is required.
   WebRtc_set_lookahead(aec->delay_estimator, 0);
 #else
+  aec->reported_delay_enabled = 1;
   WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks);
 #endif
+  aec->extended_filter_enabled = 0;
 
   // Assembly optimization
   WebRtcAec_FilterFar = FilterFar;
@@ -1592,12 +1607,6 @@
   aec->shift_offset = kInitialShiftOffset;
   aec->delay_quality_threshold = kDelayQualityThresholdMin;
 
-#ifdef WEBRTC_ANDROID
-  aec->reported_delay_enabled = 0;  // Disabled by default.
-#else
-  aec->reported_delay_enabled = 1;
-#endif
-  aec->extended_filter_enabled = 0;
   aec->num_partitions = kNormalNumPartitions;
 
   // Update the delay estimator with filter length.  We use half the
@@ -1715,11 +1724,7 @@
 }
 
 int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
-  int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements);
-  WebRtc_MoveReadPtr(aec->far_buf, elements);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_MoveReadPtr(aec->far_time_buf, elements);
-#endif
+  int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements);
   aec->system_delay -= elements_moved * PART_LEN;
   return elements_moved;
 }
@@ -1792,42 +1797,27 @@
       // which should be investigated. Maybe, allow for a non-symmetric
       // rounding, like -16.
       int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
-      int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
-      WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
+      int moved_elements =
+          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
       aec->knownDelay -= moved_elements * PART_LEN;
-  #ifdef WEBRTC_AEC_DEBUG_DUMP
-      WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
-  #endif
     } else {
       // 2 b) Apply signal based delay correction.
       int move_elements = SignalBasedDelayCorrection(aec);
-      int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
-      WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
-  #ifdef WEBRTC_AEC_DEBUG_DUMP
-      WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
-  #endif
+      int moved_elements =
+          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+      int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) -
+          WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
       WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
       WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
                                            moved_elements);
       aec->signal_delay_correction += moved_elements;
-      // TODO(bjornv): Investigate if this is reasonable.  I had to add this
-      // guard when the signal based delay correction replaces the system based
-      // one. Otherwise there was a buffer underrun in the "qa-new/01/"
-      // recording when adding 44 ms extra delay.  This was not seen if we kept
-      // both delay correction algorithms running in parallel.
-      // A first investigation showed that we have a drift in this case that
-      // causes the buffer underrun.  Compared to when delay correction was
-      // turned off, we get buffer underrun as well which was triggered in 1)
-      // above.  In addition there was a shift in |knownDelay| later increasing
-      // the buffer.  When running in parallel, this if statement was not
-      // triggered.  This suggests two alternatives; (a) use both algorithms, or
-      // (b) allow for smaller delay corrections when we operate close to the
-      // buffer limit.  At the time of testing we required a change of 6 blocks,
-      // but could change it to, e.g., 2 blocks. It requires some testing
-      // though.
-      if ((int)WebRtc_available_read(aec->far_buf) < (aec->mult + 1)) {
-        // We don't have enough data so we stuff the far-end buffers.
-        WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
+      // If we rely on reported system delay values only, a buffer underrun here
+      // can never occur since we've taken care of that in 1) above.  Here, we
+      // apply signal based delay correction and can therefore end up with
+      // buffer underruns since the delay estimation can be wrong.  We therefore
+      // stuff the buffer with enough elements if needed.
+      if (far_near_buffer_diff < 0) {
+        WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff);
       }
     }
 
diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c
index a39fd2c..5dfe7ad 100644
--- a/webrtc/modules/audio_processing/aec/echo_cancellation.c
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c
@@ -235,7 +235,10 @@
   aecpc->checkBuffSize = 1;
   aecpc->firstVal = 0;
 
-  aecpc->startup_phase = WebRtcAec_reported_delay_enabled(aecpc->aec);
+  // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
+  // but not extended_filter mode.
+  aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
+      WebRtcAec_reported_delay_enabled(aecpc->aec);
   aecpc->bufSizeStart = 0;
   aecpc->checkBufSizeCtr = 0;
   aecpc->msInSndCardBuf = 0;
@@ -718,9 +721,7 @@
     }
   } else {
     // AEC is enabled.
-    if (WebRtcAec_reported_delay_enabled(aecpc->aec)) {
-      EstBufDelayNormal(aecpc);
-    }
+    EstBufDelayNormal(aecpc);
 
     // Call the AEC.
     // TODO(bjornv): Re-structure such that we don't have to pass
@@ -782,12 +783,13 @@
     // measurement.
     int startup_size_ms =
         reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
+#if defined(WEBRTC_ANDROID)
     int target_delay = startup_size_ms * self->rate_factor * 8;
-#if !defined(WEBRTC_ANDROID)
+#else
     // To avoid putting the AEC in a non-causal state we're being slightly
     // conservative and scale by 2. On Android we use a fixed delay and
     // therefore there is no need to scale the target_delay.
-    target_delay /= 2;
+    int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
 #endif
     int overhead_elements =
         (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
@@ -795,9 +797,7 @@
     self->startup_phase = 0;
   }
 
-  if (WebRtcAec_reported_delay_enabled(self->aec)) {
-    EstBufDelayExtended(self);
-  }
+  EstBufDelayExtended(self);
 
   {
     // |delay_diff_offset| gives us the option to manually rewind the delay on
diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
index 42800c5..31401d4 100644
--- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
+++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
@@ -9,7 +9,6 @@
  */
 
 #include "testing/gtest/include/gtest/gtest.h"
-#include "webrtc/base/checks.h"
 extern "C" {
 #include "webrtc/modules/audio_processing/aec/aec_core.h"
 }
@@ -41,7 +40,7 @@
 
   // Maps buffer size in ms into samples, taking the unprocessed frame into
   // account.
-  int MapBufferSizeToSamples(int size_in_ms);
+  int MapBufferSizeToSamples(int size_in_ms, bool extended_filter);
 
   void* handle_;
   Aec* self_;
@@ -100,6 +99,7 @@
 void SystemDelayTest::Init(int sample_rate_hz) {
   // Initialize AEC
   EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000));
+  EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec));
 
   // One frame equals 10 ms of data.
   samples_per_frame_ = sample_rate_hz / 100;
@@ -135,26 +135,38 @@
   // up the far-end buffer with the same amount as we will report in through
   // Process().
   int buffer_size = BufferFillUp();
-  // A stable device should be accepted and put in a regular process mode within
-  // |kStableConvergenceMs|.
-  int process_time_ms = 0;
-  for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+
+  if (WebRtcAec_reported_delay_enabled(self_->aec) == 0) {
+    // In extended_filter mode we set the buffer size after the first processed
+    // 10 ms chunk. Hence, we don't need to wait for the reported system delay
+    // values to become stable.
     RenderAndCapture(kDeviceBufMs);
     buffer_size += samples_per_frame_;
-    if (self_->startup_phase == 0) {
-      // We have left the startup phase.
-      break;
+    EXPECT_EQ(0, self_->startup_phase);
+  } else {
+    // A stable device should be accepted and put in a regular process mode
+    // within |kStableConvergenceMs|.
+    int process_time_ms = 0;
+    for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+      RenderAndCapture(kDeviceBufMs);
+      buffer_size += samples_per_frame_;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
     }
+    // Verify convergence time.
+    EXPECT_GT(kStableConvergenceMs, process_time_ms);
   }
-  // Verify convergence time.
-  EXPECT_GT(kStableConvergenceMs, process_time_ms);
   // Verify that the buffer has been flushed.
   EXPECT_GE(buffer_size, WebRtcAec_system_delay(self_->aec));
 }
 
-int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms) {
-  // The extra 10 ms corresponds to the unprocessed frame.
-  return (size_in_ms + 10) * samples_per_frame_ / 10;
+  int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms,
+                                              bool extended_filter) {
+  // If extended_filter is disabled we add an extra 10 ms for the unprocessed
+  // frame. That is simply how the algorithm is constructed.
+  return (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10;
 }
 
 // The tests should meet basic requirements and not be adjusted to what is
@@ -181,14 +193,23 @@
 TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) {
   // When we add data to the AEC buffer the internal system delay should be
   // incremented with the same amount as the size of data.
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-
-    // Loop through a couple of calls to make sure the system delay increments
-    // correctly.
-    for (int j = 1; j <= 5; j++) {
-      EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
-      EXPECT_EQ(j * samples_per_frame_, WebRtcAec_system_delay(self_->aec));
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        // Loop through a couple of calls to make sure the system delay
+        // increments correctly.
+        for (int j = 1; j <= 5; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          EXPECT_EQ(j * samples_per_frame_, WebRtcAec_system_delay(self_->aec));
+        }
+      }
     }
   }
 }
@@ -199,21 +220,42 @@
 TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) {
   // We run the system in a stable startup. After that we verify that the system
   // delay meets the requirements.
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
 
-    // Verify system delay with respect to requirements, i.e., the
-    // |system_delay| is in the interval [75%, 100%] of what's reported on the
-    // average.
-    int average_reported_delay = kDeviceBufMs * samples_per_frame_ / 10;
-    EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
-    EXPECT_LE(average_reported_delay * 3 / 4,
-              WebRtcAec_system_delay(self_->aec));
+        // Verify system delay with respect to requirements, i.e., the
+        // |system_delay| is in the interval [75%, 100%] of what's reported on
+        // the average.
+        // In extended_filter mode we target 50% and measure after one processed
+        // 10 ms chunk.
+        int average_reported_delay = kDeviceBufMs * samples_per_frame_ / 10;
+        EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
+        int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec)
+                              ? average_reported_delay / 2 - samples_per_frame_
+                              : average_reported_delay * 3 / 4;
+        EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec));
+      }
+    }
   }
 }
 
 TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_reported_delay(self_->aec, 1);
+  EXPECT_EQ(1, WebRtcAec_reported_delay_enabled(self_->aec));
+
   // In an unstable system we would start processing after |kMaxConvergenceMs|.
   // On the last frame the AEC buffer is adjusted to 60% of the last reported
   // device buffer size.
@@ -254,15 +296,19 @@
   }
 }
 
-TEST_F(SystemDelayTest,
-       DISABLED_ON_ANDROID(CorrectDelayAfterStableBufferBuildUp)) {
+TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_reported_delay(self_->aec, 1);
+  EXPECT_EQ(1, WebRtcAec_reported_delay_enabled(self_->aec));
+
   // In this test we start by establishing the device buffer size during stable
   // conditions, but with an empty internal far-end buffer. Once that is done we
   // verify that the system delay is increased correctly until we have reach an
   // internal buffer size of 75% of what's been reported.
-
-  // This test assumes the reported delays are used.
-  WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(handle_), 1);
   for (size_t i = 0; i < kNumSampleRates; i++) {
     Init(kSampleRateHz[i]);
 
@@ -316,62 +362,73 @@
   // WebRtcAec_Process() we will finally run out of data, but should
   // automatically stuff the buffer. We verify this behavior by checking if the
   // system delay goes negative.
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
 
-    // The AEC has now left the Startup phase. We now have at most
-    // |kStableConvergenceMs| in the buffer. Keep on calling Process() until
-    // we run out of data and verify that the system delay is non-negative.
-    for (int j = 0; j <= kStableConvergenceMs; j += 10) {
-      EXPECT_EQ(0,
-                WebRtcAec_Process(handle_,
-                                  &near_ptr_,
-                                  1,
-                                  &out_ptr_,
-                                  samples_per_frame_,
-                                  kDeviceBufMs,
-                                  0));
-      EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        // The AEC has now left the Startup phase. We now have at most
+        // |kStableConvergenceMs| in the buffer. Keep on calling Process() until
+        // we run out of data and verify that the system delay is non-negative.
+        for (int j = 0; j <= kStableConvergenceMs; j += 10) {
+          EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_,
+                                         samples_per_frame_, kDeviceBufMs, 0));
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+      }
     }
   }
 }
 
-TEST_F(SystemDelayTest, DISABLED_ON_ANDROID(CorrectDelayDuringDrift)) {
+TEST_F(SystemDelayTest, CorrectDelayDuringDrift) {
   // This drift test should verify that the system delay is never exceeding the
   // device buffer. The drift is simulated by decreasing the reported device
   // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30
   // ms we jump (add) 10 ms to give a repeated pattern.
 
-  // This test assumes the reported delays are used.
-  WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(handle_), 1);
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
 
-    // We have now left the startup phase and proceed with normal processing.
-    int jump = 0;
-    for (int j = 0; j < 1000; j++) {
-      // Drift = -1 ms per 100 ms of data.
-      int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
-      int device_buf = MapBufferSizeToSamples(device_buf_ms);
+        // We have left the startup phase and proceed with normal processing.
+        int jump = 0;
+        for (int j = 0; j < 1000; j++) {
+          // Drift = -1 ms per 100 ms of data.
+          int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
+          int device_buf = MapBufferSizeToSamples(device_buf_ms,
+                                                  extended_filter == 1);
 
-      if (device_buf_ms < 30) {
-        // Add 10 ms data, taking affect next frame.
-        jump += 10;
+          if (device_buf_ms < 30) {
+            // Add 10 ms data, taking affect next frame.
+            jump += 10;
+          }
+          RenderAndCapture(device_buf_ms);
+
+          // Verify that the system delay does not exceed the device buffer.
+          EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
       }
-      RenderAndCapture(device_buf_ms);
-
-      // Verify that the system delay does not exceed the device buffer.
-      EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
-
-      // Verify that the system delay is non-negative.
-      EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
     }
   }
 }
 
-TEST_F(SystemDelayTest, DISABLED_ON_ANDROID(ShouldRecoverAfterGlitch)) {
+TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) {
   // This glitch test should verify that the system delay recovers if there is
   // a glitch in data. The data glitch is constructed as 200 ms of buffering
   // after which the stable procedure continues. The glitch is never reported by
@@ -379,79 +436,100 @@
   // The system is said to be in a non-causal state if the difference between
   // the device buffer and system delay is less than a block (64 samples).
 
-  // This test assumes the reported delays are used.
-  WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(handle_), 1);
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
-    int device_buf = MapBufferSizeToSamples(kDeviceBufMs);
-    // Glitch state.
-    for (int j = 0; j < 20; j++) {
-      EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
-      // No need to verify system delay, since that is done in a separate test.
-    }
-    // Verify that we are in a non-causal state, i.e.,
-    // |system_delay| > |device_buf|.
-    EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
-
-    // Recover state. Should recover at least 4 ms of data per 10 ms, hence a
-    // glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover from.
-    bool non_causal = true;  // We are currently in a non-causal state.
-    for (int j = 0; j < 50; j++) {
-      int system_delay_before = WebRtcAec_system_delay(self_->aec);
-      RenderAndCapture(kDeviceBufMs);
-      int system_delay_after = WebRtcAec_system_delay(self_->aec);
-
-      // We have recovered if |device_buf| - |system_delay_after| >= 64 (one
-      // block). During recovery |system_delay_after| < |system_delay_before|,
-      // otherwise they are equal.
-      if (non_causal) {
-        EXPECT_LT(system_delay_after, system_delay_before);
-        if (device_buf - system_delay_after >= 64) {
-          non_causal = false;
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                extended_filter == 1);
+        // Glitch state.
+        for (int j = 0; j < 20; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          // No need to verify system delay, since that is done in a separate
+          // test.
         }
-      } else {
-        EXPECT_EQ(system_delay_before, system_delay_after);
+        // Verify that we are in a non-causal state, i.e.,
+        // |system_delay| > |device_buf|.
+        EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
+
+        // Recover state. Should recover at least 4 ms of data per 10 ms, hence
+        // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover
+        // from.
+        bool non_causal = true;  // We are currently in a non-causal state.
+        for (int j = 0; j < 50; j++) {
+          int system_delay_before = WebRtcAec_system_delay(self_->aec);
+          RenderAndCapture(kDeviceBufMs);
+          int system_delay_after = WebRtcAec_system_delay(self_->aec);
+          // We have recovered if
+          // |device_buf| - |system_delay_after| >= PART_LEN (1 block).
+          // During recovery, |system_delay_after| < |system_delay_before|,
+          // otherwise they are equal.
+          if (non_causal) {
+            EXPECT_LT(system_delay_after, system_delay_before);
+            if (device_buf - system_delay_after >= PART_LEN) {
+              non_causal = false;
+            }
+          } else {
+            EXPECT_EQ(system_delay_before, system_delay_after);
+          }
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Check that we have recovered.
+        EXPECT_FALSE(non_causal);
       }
-      // Verify that the system delay is non-negative.
-      EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
     }
-    // Check that we have recovered.
-    EXPECT_FALSE(non_causal);
   }
 }
 
 TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) {
-  // This spurious device buffer data test aims at verifying that the system
-  // delay is unaffected by large outliers.
-  // The system is said to be in a non-causal state if the difference between
-  // the device buffer and system delay is less than a block (64 samples).
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
-    int device_buf = MapBufferSizeToSamples(kDeviceBufMs);
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size.
+  const int extended_filter = 0;
+  WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+  EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
 
-    // Normal state. We are currently not in a non-causal state.
-    bool non_causal = false;
+  // Should be DA-AEC independent.
+  for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+    WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+    EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+    // This spurious device buffer data test aims at verifying that the system
+    // delay is unaffected by large outliers.
+    // The system is said to be in a non-causal state if the difference between
+    // the device buffer and system delay is less than a block (64 samples).
+    for (size_t i = 0; i < kNumSampleRates; i++) {
+      Init(kSampleRateHz[i]);
+      RunStableStartup();
+      int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                              extended_filter == 1);
 
-    // Run 1 s and replace device buffer size with 500 ms every 100 ms.
-    for (int j = 0; j < 100; j++) {
-      int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
-      int device_buf_ms = kDeviceBufMs;
-      if (j % 10 == 0) {
-        device_buf_ms = 500;
+      // Normal state. We are currently not in a non-causal state.
+      bool non_causal = false;
+
+      // Run 1 s and replace device buffer size with 500 ms every 100 ms.
+      for (int j = 0; j < 100; j++) {
+        int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+        int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs;
+        RenderAndCapture(device_buf_ms);
+
+        // Check for non-causality.
+        if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) {
+          non_causal = true;
+        }
+        EXPECT_FALSE(non_causal);
+        EXPECT_EQ(system_delay_before_calls,
+                  WebRtcAec_system_delay(self_->aec));
+
+        // Verify that the system delay is non-negative.
+        EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
       }
-      RenderAndCapture(device_buf_ms);
-
-      // Check for non-causality.
-      if (device_buf - WebRtcAec_system_delay(self_->aec) < 64) {
-        non_causal = true;
-      }
-      EXPECT_FALSE(non_causal);
-      EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
-
-      // Verify that the system delay is non-negative.
-      EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
     }
   }
 }
@@ -462,36 +540,54 @@
   // The test is constructed such that every other device buffer value is zero
   // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The
   // zero values will force us into a non-causal state and thereby lowering the
-  // system delay until we basically runs out of data. Once that happens the
+  // system delay until we basically run out of data. Once that happens the
   // buffer will be stuffed.
   // TODO(bjornv): This test will have a better impact if we verified that the
-  // delay estimate goes up when the system delay goes done to meet the average
+  // delay estimate goes up when the system delay goes down to meet the average
   // device buffer size.
-  for (size_t i = 0; i < kNumSampleRates; i++) {
-    Init(kSampleRateHz[i]);
-    RunStableStartup();
-    int device_buf = MapBufferSizeToSamples(kDeviceBufMs);
 
-    // Normal state. We are currently not in a non-causal state.
-    bool non_causal = false;
+  // This test does not apply if DA-AEC is enabled and extended_filter mode
+  // disabled.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_reported_delay(self_->aec, 1 - da_aec);
+      EXPECT_EQ(1 - da_aec, WebRtcAec_reported_delay_enabled(self_->aec));
+      if (extended_filter == 0 && da_aec == 1) {
+        continue;
+      }
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        const int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                      extended_filter == 1);
 
-    // Loop through 100 frames (both render and capture), which equals 1 s of
-    // data. Every odd frame we set the device buffer size to 2 * |kDeviceBufMs|
-    // and even frames we set the device buffer size to zero.
-    for (int j = 0; j < 100; j++) {
-      int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
-      int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
-      RenderAndCapture(device_buf_ms);
+        // Normal state. We are currently not in a non-causal state.
+        bool non_causal = false;
 
-      // Check for non-causality, compared with the average device buffer size.
-      non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
-      EXPECT_GE(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
+        // Loop through 100 frames (both render and capture), which equals 1 s
+        // of data. Every odd frame we set the device buffer size to
+        // 2 * |kDeviceBufMs| and even frames we set the device buffer size to
+        // zero.
+        for (int j = 0; j < 100; j++) {
+          int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+          int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
+          RenderAndCapture(device_buf_ms);
 
-      // Verify that the system delay is non-negative.
-      EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+          // Check for non-causality, compared with the average device buffer
+          // size.
+          non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
+          EXPECT_GE(system_delay_before_calls,
+                    WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Verify we are not in a non-causal state.
+        EXPECT_FALSE(non_causal);
+      }
     }
-    // Verify we are not in a non-causal state.
-    EXPECT_FALSE(non_causal);
   }
 }