AGC2 adaptive digital dry run mode Add the option to run the adaptive digital controller of AGC2 without side-effects - i.e., no gain applied. Tested: adapation verified during a video call in chromium Bug: webrtc:7494 Change-Id: I4776f6012907d76a17a3bca89991da97dc38657f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215964 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33875}

commit: d66a60597de5db028172803abc273bff5075239e [log] [tgz]
author: Alessio Bazzica <alessiob@webrtc.org> Thu Apr 29 16:13:25 2021 +0200
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Apr 29 16:05:57 2021 +0000
tree: 9756c4bdd3c65f6dcb37a50780224b20712187a6
parent: 141a4de0727cb2eba7d21e896e4ecf72b5be1502 [diff]
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 8bf192e..3fc9008 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc

@@ -25,10 +25,6 @@
 using NoiseEstimatorType =
     AudioProcessing::Config::GainController2::NoiseEstimator;
 
-constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
-constexpr float kMaxGainChangePerSecondDb = 3.0f;
-constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
-
 // Detects the available CPU features and applies any kill-switches.
 AvailableCpuFeatures GetAllowedCpuFeatures(
     const AdaptiveDigitalConfig& config) {
@@ -56,29 +52,8 @@
   }
 }
 
-constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType =
-    NoiseEstimatorType::kNoiseFloor;
-
 }  // namespace
 
-AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
-    : speech_level_estimator_(apm_data_dumper),
-      gain_controller_(apm_data_dumper,
-                       kGainApplierAdjacentSpeechFramesThreshold,
-                       kMaxGainChangePerSecondDb,
-                       kMaxOutputNoiseLevelDbfs),
-      apm_data_dumper_(apm_data_dumper),
-      noise_level_estimator_(
-          CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType,
-                                    apm_data_dumper)),
-      saturation_protector_(
-          CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
-                                    kSaturationProtectorExtraHeadroomDb,
-                                    kGainApplierAdjacentSpeechFramesThreshold,
-                                    apm_data_dumper)) {
-  RTC_DCHECK(apm_data_dumper);
-}
-
 AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
                          const AdaptiveDigitalConfig& config)
     : speech_level_estimator_(apm_data_dumper,
@@ -87,7 +62,8 @@
       gain_controller_(apm_data_dumper,
                        config.adjacent_speech_frames_threshold,
                        config.max_gain_change_db_per_second,
-                       config.max_output_noise_level_dbfs),
+                       config.max_output_noise_level_dbfs,
+                       config.dry_run),
       apm_data_dumper_(apm_data_dumper),
       noise_level_estimator_(
           CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)),
@@ -106,6 +82,10 @@
 
 AdaptiveAgc::~AdaptiveAgc() = default;
 
+void AdaptiveAgc::Initialize(int sample_rate_hz, int num_channels) {
+  gain_controller_.Initialize(sample_rate_hz, num_channels);
+}
+
 void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
   AdaptiveDigitalGainApplier::FrameInfo info;
 

diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h
index fe81444..43c7787 100644
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h

@@ -25,19 +25,21 @@
 class ApmDataDumper;
 
 // Adaptive digital gain controller.
-// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
+// TODO(crbug.com/webrtc/7494): Rename to `AdaptiveDigitalGainController`.
 class AdaptiveAgc {
  public:
-  explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
-  // TODO(crbug.com/webrtc/7494): Remove ctor above.
   AdaptiveAgc(
       ApmDataDumper* apm_data_dumper,
       const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
   ~AdaptiveAgc();
 
+  void Initialize(int sample_rate_hz, int num_channels);
+
+  // TODO(crbug.com/webrtc/7494): Add `SetLimiterEnvelope()`.
+
   // Analyzes `frame` and applies a digital adaptive gain to it. Takes into
   // account the envelope measured by the limiter.
-  // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
+  // TODO(crbug.com/webrtc/7494): Remove `limiter_envelope`.
   void Process(AudioFrameView<float> frame, float limiter_envelope);
 
   // Handles a gain change applied to the input signal (e.g., analog gain).

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index 8a8a7fd..e59b110 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc

@@ -92,13 +92,28 @@
                         max_gain_increase_db);
 }
 
+// Copies the (multichannel) audio samples from `src` into `dst`.
+void CopyAudio(AudioFrameView<const float> src,
+               std::vector<std::vector<float>>& dst) {
+  RTC_DCHECK_GT(src.num_channels(), 0);
+  RTC_DCHECK_GT(src.samples_per_channel(), 0);
+  RTC_DCHECK_EQ(dst.size(), src.num_channels());
+  for (size_t c = 0; c < src.num_channels(); ++c) {
+    rtc::ArrayView<const float> channel_view = src.channel(c);
+    RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
+    RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
+    std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
+  }
+}
+
 }  // namespace
 
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
     ApmDataDumper* apm_data_dumper,
     int adjacent_speech_frames_threshold,
     float max_gain_change_db_per_second,
-    float max_output_noise_level_dbfs)
+    float max_output_noise_level_dbfs,
+    bool dry_run)
     : apm_data_dumper_(apm_data_dumper),
       gain_applier_(
           /*hard_clip_samples=*/false,
@@ -107,13 +122,39 @@
       max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
                                    kFrameDurationMs / 1000.f),
       max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
+      dry_run_(dry_run),
       calls_since_last_gain_log_(0),
       frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
       last_gain_db_(kInitialAdaptiveDigitalGainDb) {
-  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
+  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
-  RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
-  RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
+  RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
+  RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
+  Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
+}
+
+void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
+                                            int num_channels) {
+  if (!dry_run_) {
+    return;
+  }
+  RTC_DCHECK_GT(sample_rate_hz, 0);
+  RTC_DCHECK_GT(num_channels, 0);
+  int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
+  bool sample_rate_changed =
+      dry_run_frame_.empty() ||  // Handle initialization.
+      dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
+  bool num_channels_changed =
+      dry_run_channels_.size() != static_cast<size_t>(num_channels);
+  if (sample_rate_changed || num_channels_changed) {
+    // Resize the multichannel audio vector and update the channel pointers.
+    dry_run_frame_.resize(num_channels);
+    dry_run_channels_.resize(num_channels);
+    for (int c = 0; c < num_channels; ++c) {
+      dry_run_frame_[c].resize(frame_size);
+      dry_run_channels_[c] = dry_run_frame_[c].data();
+    }
+  }
 }
 
 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
@@ -174,7 +215,19 @@
     gain_applier_.SetGainFactor(
         DbToRatio(last_gain_db_ + gain_change_this_frame_db));
   }
-  gain_applier_.ApplyGain(frame);
+
+  // Modify `frame` only if not running in "dry run" mode.
+  if (!dry_run_) {
+    gain_applier_.ApplyGain(frame);
+  } else {
+    // Copy `frame` so that `ApplyGain()` is called (on a copy).
+    CopyAudio(frame, dry_run_frame_);
+    RTC_DCHECK(!dry_run_channels_.empty());
+    AudioFrameView<float> frame_copy(&dry_run_channels_[0],
+                                     frame.num_channels(),
+                                     frame.samples_per_channel());
+    gain_applier_.ApplyGain(frame_copy);
+  }
 
   // Remember that the gain has changed for the next iteration.
   last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index 74220fa..8b58ea0 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h

@@ -11,6 +11,8 @@
 #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
 #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
 
+#include <vector>
+
 #include "modules/audio_processing/agc2/gain_applier.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 
@@ -37,15 +39,18 @@
   // frames must be observed in order to consider the sequence as speech.
   // `max_gain_change_db_per_second` limits the adaptation speed (uniformly
   // operated across frames). `max_output_noise_level_dbfs` limits the output
-  // noise level.
+  // noise level. If `dry_run` is true, `Process()` will not modify the audio.
   AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
                              int adjacent_speech_frames_threshold,
                              float max_gain_change_db_per_second,
-                             float max_output_noise_level_dbfs);
+                             float max_output_noise_level_dbfs,
+                             bool dry_run);
   AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
   AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
       delete;
 
+  void Initialize(int sample_rate_hz, int num_channels);
+
   // Analyzes `info`, updates the digital gain and applies it to a 10 ms
   // `frame`. Supports any sample rate supported by APM.
   void Process(const FrameInfo& info, AudioFrameView<float> frame);
@@ -57,10 +62,14 @@
   const int adjacent_speech_frames_threshold_;
   const float max_gain_change_db_per_10ms_;
   const float max_output_noise_level_dbfs_;
+  const bool dry_run_;
 
   int calls_since_last_gain_log_;
   int frames_to_gain_increase_allowed_;
   float last_gain_db_;
+
+  std::vector<std::vector<float>> dry_run_frame_;
+  std::vector<float*> dry_run_channels_;
 };
 
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index ee9cb02..f4a23a9 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc

@@ -48,7 +48,8 @@
             &apm_data_dumper,
             adjacent_speech_frames_threshold,
             kMaxGainChangePerSecondDb,
-            kMaxOutputNoiseLevelDbfs)) {}
+            kMaxOutputNoiseLevelDbfs,
+            /*dry_run=*/false)) {}
   ApmDataDumper apm_data_dumper;
   std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
 };
@@ -67,6 +68,7 @@
 
 TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
   // Make one call with reasonable audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
@@ -80,6 +82,7 @@
       static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
 
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.speech_level_dbfs = -60.0f;
   float applied_gain;
@@ -94,6 +97,7 @@
 
 TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   // A few extra frames for safety.
@@ -131,6 +135,7 @@
 
 TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
 
@@ -155,6 +160,7 @@
 
 TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
@@ -184,6 +190,7 @@
 
 TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
 
   // Make one call with positive audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
@@ -194,6 +201,7 @@
 
 TEST(GainController2GainApplier, AudioLevelLimitsGain) {
   GainApplierHelper helper;
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
@@ -231,6 +239,7 @@
        DoNotIncreaseGainWithTooFewSpeechFrames) {
   const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
   GainApplierHelper helper(adjacent_speech_frames_threshold);
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   float prev_gain = 0.0f;
   for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
@@ -248,6 +257,7 @@
 TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
   const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
   GainApplierHelper helper(adjacent_speech_frames_threshold);
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   float prev_gain = 0.0f;
   for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
@@ -269,5 +279,68 @@
                          AdaptiveDigitalGainApplierTest,
                          ::testing::Values(1, 7, 31));
 
+// Checks that the input is never modified when running in dry run mode.
+TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveDigitalGainApplier gain_applier(
+      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+  // Simulate an input signal with log speech level.
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.speech_level_dbfs = -60.0f;
+  // Allow enough time to reach the maximum gain.
+  constexpr int kNumFramesToAdapt =
+      static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
+  constexpr float kPcmSamples = 123.456f;
+  // Run the gain applier and check that the PCM samples are not modified.
+  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+  for (int i = 0; i < kNumFramesToAdapt; ++i) {
+    SCOPED_TRACE(i);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
+    gain_applier.Process(info, fake_audio.float_frame_view());
+    EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
+  }
+}
+
+// Checks that no sample is modified before and after the sample rate changes.
+TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveDigitalGainApplier gain_applier(
+      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.speech_level_dbfs = -60.0f;
+  constexpr float kPcmSamples = 123.456f;
+  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
+  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+  gain_applier.Process(info, fake_audio_8k.float_frame_view());
+  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
+  gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
+  VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
+  gain_applier.Process(info, fake_audio_48k.float_frame_view());
+  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
+}
+
+// Checks that no sample is modified before and after the number of channels
+// changes.
+TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveDigitalGainApplier gain_applier(
+      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
+      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.speech_level_dbfs = -60.0f;
+  constexpr float kPcmSamples = 123.456f;
+  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
+  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
+  gain_applier.Process(info, fake_audio_8k.float_frame_view());
+  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
+  VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
+  gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
+  gain_applier.Process(info, fake_audio_48k.float_frame_view());
+  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
+  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
+}
+
 }  // namespace
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 0f806d3..adb1614 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h

@@ -44,8 +44,6 @@
     1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
 
 // Robust VAD probability and speech decisions.
-constexpr int kDefaultVadRnnResetPeriodMs = 1500;
-static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, "");
 constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
 
 // Saturation Protector settings.

diff --git a/modules/audio_processing/agc2/vad_with_level.cc b/modules/audio_processing/agc2/vad_with_level.cc
index 034f2b6..9747ca2 100644
--- a/modules/audio_processing/agc2/vad_with_level.cc
+++ b/modules/audio_processing/agc2/vad_with_level.cc

@@ -67,10 +67,6 @@
 
 }  // namespace
 
-VadLevelAnalyzer::VadLevelAnalyzer()
-    : VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) {
-}
-
 VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms,
                                    const AvailableCpuFeatures& cpu_features)
     : VadLevelAnalyzer(vad_reset_period_ms,

diff --git a/modules/audio_processing/agc2/vad_with_level.h b/modules/audio_processing/agc2/vad_with_level.h
index 7cd93d6..8d2ae45 100644
--- a/modules/audio_processing/agc2/vad_with_level.h
+++ b/modules/audio_processing/agc2/vad_with_level.h

@@ -37,8 +37,6 @@
     virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
   };
 
-  // Ctor. Uses the default VAD with the default settings.
-  VadLevelAnalyzer();
   // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
   // `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
   // duration of two frames. Uses `cpu_features` to instantiate the default VAD.

diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc
index 99b0136..ec8e476 100644
--- a/modules/audio_processing/agc2/vad_with_level_unittest.cc
+++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc

@@ -71,16 +71,16 @@
   const AudioFrameView<const float> view;
 };
 
-TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
+TEST(GainController2VadLevelAnalyzer, RmsLessThanPeakLevel) {
+  auto analyzer = CreateVadLevelAnalyzerWithMockVad(
+      /*vad_reset_period_ms=*/1500,
+      /*speech_probabilities=*/{1.0f},
+      /*expected_vad_reset_calls=*/0);
   // Handcrafted frame so that the average is lower than the peak value.
   FrameWithView frame(1000.0f);  // Constant frame.
   frame.samples[10] = 2000.0f;   // Except for one peak value.
-
-  // Compute audio frame levels (the VAD result is ignored).
-  VadLevelAnalyzer analyzer;
-  auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view);
-
-  // Compare peak and RMS levels.
+  // Compute audio frame levels.
+  auto levels_and_vad_prob = analyzer->AnalyzeFrame(frame.view);
   EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
 }
 

diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 842fd88..225b6b5 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc

@@ -1936,7 +1936,8 @@
       submodules_.gain_controller2.reset(new GainController2());
     }
 
-    submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz());
+    submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz(),
+                                             num_input_channels());
     submodules_.gain_controller2->ApplyConfig(config_.gain_controller2);
   } else {
     submodules_.gain_controller2.reset();

diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 9e3e8e7..74b63c9 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc

@@ -26,22 +26,26 @@
 GainController2::GainController2()
     : data_dumper_(rtc::AtomicOps::Increment(&instance_count_)),
       gain_applier_(/*hard_clip_samples=*/false,
-                    /*initial_gain_factor=*/0.f),
+                    /*initial_gain_factor=*/0.0f),
       limiter_(static_cast<size_t>(48000), &data_dumper_, "Agc2"),
       calls_since_last_limiter_log_(0) {
   if (config_.adaptive_digital.enabled) {
-    adaptive_agc_ = std::make_unique<AdaptiveAgc>(&data_dumper_);
+    adaptive_agc_ =
+        std::make_unique<AdaptiveAgc>(&data_dumper_, config_.adaptive_digital);
   }
 }
 
 GainController2::~GainController2() = default;
 
-void GainController2::Initialize(int sample_rate_hz) {
+void GainController2::Initialize(int sample_rate_hz, int num_channels) {
   RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
              sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
              sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
              sample_rate_hz == AudioProcessing::kSampleRate48kHz);
   limiter_.SetSampleRate(sample_rate_hz);
+  if (adaptive_agc_) {
+    adaptive_agc_->Initialize(sample_rate_hz, num_channels);
+  }
   data_dumper_.InitiateNewSetOfRecordings();
   data_dumper_.DumpRaw("sample_rate_hz", sample_rate_hz);
   calls_since_last_limiter_log_ = 0;

diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h
index b62890d..ce758c7 100644
--- a/modules/audio_processing/gain_controller2.h
+++ b/modules/audio_processing/gain_controller2.h

@@ -34,7 +34,7 @@
   GainController2& operator=(const GainController2&) = delete;
   ~GainController2();
 
-  void Initialize(int sample_rate_hz);
+  void Initialize(int sample_rate_hz, int num_channels);
   void Process(AudioBuffer* audio);
   void NotifyAnalogLevel(int level);
 

diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index 815d58e..85c08bb 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc

@@ -65,7 +65,7 @@
     size_t sample_rate_hz) {
   auto agc2 = std::make_unique<GainController2>();
   agc2->ApplyConfig(CreateAgc2FixedDigitalModeConfig(fixed_gain_db));
-  agc2->Initialize(sample_rate_hz);
+  agc2->Initialize(sample_rate_hz, /*num_channels=*/1);
   return agc2;
 }
 
@@ -337,9 +337,10 @@
   constexpr float kExpectedGainDb = 4.3f;
   constexpr float kToleranceDb = 0.5f;
   GainController2 gain_controller2;
-  gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz);
+  gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz,
+                              /*num_channels=*/1);
   AudioProcessing::Config::GainController2 config;
-  config.fixed_digital.gain_db = 0.f;
+  config.fixed_digital.gain_db = 0.0f;
   config.adaptive_digital.enabled = true;
   gain_controller2.ApplyConfig(config);
   EXPECT_NEAR(

diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 01bb7c3..8072230 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h

@@ -358,6 +358,7 @@
       } fixed_digital;
       struct AdaptiveDigital {
         bool enabled = false;
+        bool dry_run = false;
         NoiseEstimator noise_estimator = kNoiseFloor;
         int vad_reset_period_ms = 1500;
         int adjacent_speech_frames_threshold = 12;
commit	d66a60597de5db028172803abc273bff5075239e	[log] [tgz]
author	Alessio Bazzica <alessiob@webrtc.org>	Thu Apr 29 16:13:25 2021 +0200
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Apr 29 16:05:57 2021 +0000
tree	9756c4bdd3c65f6dcb37a50780224b20712187a6
parent	141a4de0727cb2eba7d21e896e4ecf72b5be1502 [diff]