Revert "Reduce complexity in the APM pipeline when the output is not used"
This reverts commit aa6adffba325f4b698a1e94aeab020bfdc47adec.
Reason for revert: breaks webrtc-importer
Original change's description:
> Reduce complexity in the APM pipeline when the output is not used
>
> This CL selectively turns off parts of the audio processing when
> the output of APM is not used. The parts turned off are such that
> don't need to continuously need to be trained, but rather can be
> temporarily deactivated.
>
> The purpose of this CL is to allow CPU to be reduced when the
> client is muted.
>
> The CL will be follow by additional CLs, adding similar functionality
> in the echo canceller and the noiser suppressor
>
> Bug: b/177830919
> Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703
> Commit-Queue: Per Ã…hgren <peah@webrtc.org>
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#33431}
Bug: b/177830919
Change-Id: I937cd61dedcd43150933eb1b9d65aebe68401e91
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211348
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33433}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 93dc080..79a3151 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -115,10 +115,6 @@
RTC_CHECK_NOTREACHED();
}
-bool MinimizeProcessingForUnusedOutput() {
- return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
-}
-
// Maximum lengths that frame of samples being passed from the render side to
// the capture side can have (does not apply to AEC3).
static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
@@ -271,9 +267,7 @@
"WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
!field_trial::IsEnabled(
"WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
- EnforceSplitBandHpf(),
- MinimizeProcessingForUnusedOutput()),
- capture_(),
+ EnforceSplitBandHpf()),
capture_nonlocked_() {
RTC_LOG(LS_INFO) << "Injected APM submodules:"
"\nEcho control factory: "
@@ -673,9 +667,7 @@
void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
bool capture_output_used) {
- capture_.capture_output_used =
- capture_output_used || !constants_.minimize_processing_for_unused_output;
-
+ capture_.capture_output_used = capture_output_used;
if (submodules_.agc_manager.get()) {
submodules_.agc_manager->HandleCaptureOutputUsedChange(
capture_.capture_output_used);
@@ -882,7 +874,11 @@
void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
// Fall back to a safe state for the case when a setting for capture output
// usage setting has been missed.
- HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
+ capture_.capture_output_used = true;
+ if (submodules_.echo_controller) {
+ submodules_.echo_controller->SetCaptureOutputUsage(
+ capture_.capture_output_used);
+ }
}
void AudioProcessingImpl::HandleRenderRuntimeSettings() {
@@ -1230,101 +1226,87 @@
capture_buffer, /*stream_has_echo*/ false));
}
- capture_.stats.output_rms_dbfs = absl::nullopt;
- if (capture_.capture_output_used) {
- if (submodule_states_.CaptureMultiBandProcessingPresent() &&
- SampleRateSupportsMultiBand(
- capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
- capture_buffer->MergeFrequencyBands();
- }
-
- if (capture_.capture_fullband_audio) {
- const auto& ec = submodules_.echo_controller;
- bool ec_active = ec ? ec->ActiveProcessing() : false;
- // Only update the fullband buffer if the multiband processing has changed
- // the signal. Keep the original signal otherwise.
- if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
- capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
- }
- capture_buffer = capture_.capture_fullband_audio.get();
- }
-
- if (config_.residual_echo_detector.enabled) {
- RTC_DCHECK(submodules_.echo_detector);
- submodules_.echo_detector->AnalyzeCaptureAudio(
- rtc::ArrayView<const float>(capture_buffer->channels()[0],
- capture_buffer->num_frames()));
- }
-
- // TODO(aluebs): Investigate if the transient suppression placement should
- // be before or after the AGC.
- if (submodules_.transient_suppressor) {
- float voice_probability =
- submodules_.agc_manager.get()
- ? submodules_.agc_manager->voice_probability()
- : 1.f;
-
- submodules_.transient_suppressor->Suppress(
- capture_buffer->channels()[0], capture_buffer->num_frames(),
- capture_buffer->num_channels(),
- capture_buffer->split_bands_const(0)[kBand0To8kHz],
- capture_buffer->num_frames_per_band(),
- capture_.keyboard_info.keyboard_data,
- capture_.keyboard_info.num_keyboard_frames, voice_probability,
- capture_.key_pressed);
- }
-
- // Experimental APM sub-module that analyzes |capture_buffer|.
- if (submodules_.capture_analyzer) {
- submodules_.capture_analyzer->Analyze(capture_buffer);
- }
-
- if (submodules_.gain_controller2) {
- submodules_.gain_controller2->NotifyAnalogLevel(
- recommended_stream_analog_level_locked());
- submodules_.gain_controller2->Process(capture_buffer);
- }
-
- if (submodules_.capture_post_processor) {
- submodules_.capture_post_processor->Process(capture_buffer);
- }
-
- // The level estimator operates on the recombined data.
- if (config_.level_estimation.enabled) {
- submodules_.output_level_estimator->ProcessStream(*capture_buffer);
- capture_.stats.output_rms_dbfs =
- submodules_.output_level_estimator->RMS();
- }
-
- capture_output_rms_.Analyze(rtc::ArrayView<const float>(
- capture_buffer->channels_const()[0],
- capture_nonlocked_.capture_processing_format.num_frames()));
- if (log_rms) {
- RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
- RTC_HISTOGRAM_COUNTS_LINEAR(
- "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
- RmsLevel::kMinLevelDb, 64);
- RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
- levels.peak, 1, RmsLevel::kMinLevelDb, 64);
- }
-
- if (submodules_.agc_manager) {
- int level = recommended_stream_analog_level_locked();
- data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
- &level);
- }
-
- // Compute echo-detector stats.
- if (config_.residual_echo_detector.enabled) {
- RTC_DCHECK(submodules_.echo_detector);
- auto ed_metrics = submodules_.echo_detector->GetMetrics();
- capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
- capture_.stats.residual_echo_likelihood_recent_max =
- ed_metrics.echo_likelihood_recent_max;
- }
+ if (submodule_states_.CaptureMultiBandProcessingPresent() &&
+ SampleRateSupportsMultiBand(
+ capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
+ capture_buffer->MergeFrequencyBands();
}
- // Compute echo-controller stats.
+ if (capture_.capture_fullband_audio) {
+ const auto& ec = submodules_.echo_controller;
+ bool ec_active = ec ? ec->ActiveProcessing() : false;
+ // Only update the fullband buffer if the multiband processing has changed
+ // the signal. Keep the original signal otherwise.
+ if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
+ capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+ }
+ capture_buffer = capture_.capture_fullband_audio.get();
+ }
+
+ if (config_.residual_echo_detector.enabled) {
+ RTC_DCHECK(submodules_.echo_detector);
+ submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
+ capture_buffer->channels()[0], capture_buffer->num_frames()));
+ }
+
+ // TODO(aluebs): Investigate if the transient suppression placement should be
+ // before or after the AGC.
+ if (submodules_.transient_suppressor) {
+ float voice_probability = submodules_.agc_manager.get()
+ ? submodules_.agc_manager->voice_probability()
+ : 1.f;
+
+ submodules_.transient_suppressor->Suppress(
+ capture_buffer->channels()[0], capture_buffer->num_frames(),
+ capture_buffer->num_channels(),
+ capture_buffer->split_bands_const(0)[kBand0To8kHz],
+ capture_buffer->num_frames_per_band(),
+ capture_.keyboard_info.keyboard_data,
+ capture_.keyboard_info.num_keyboard_frames, voice_probability,
+ capture_.key_pressed);
+ }
+
+ // Experimental APM sub-module that analyzes |capture_buffer|.
+ if (submodules_.capture_analyzer) {
+ submodules_.capture_analyzer->Analyze(capture_buffer);
+ }
+
+ if (submodules_.gain_controller2) {
+ submodules_.gain_controller2->NotifyAnalogLevel(
+ recommended_stream_analog_level_locked());
+ submodules_.gain_controller2->Process(capture_buffer);
+ }
+
+ if (submodules_.capture_post_processor) {
+ submodules_.capture_post_processor->Process(capture_buffer);
+ }
+
+ // The level estimator operates on the recombined data.
+ if (config_.level_estimation.enabled) {
+ submodules_.output_level_estimator->ProcessStream(*capture_buffer);
+ capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
+ } else {
+ capture_.stats.output_rms_dbfs = absl::nullopt;
+ }
+
+ capture_output_rms_.Analyze(rtc::ArrayView<const float>(
+ capture_buffer->channels_const()[0],
+ capture_nonlocked_.capture_processing_format.num_frames()));
+ if (log_rms) {
+ RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
+ levels.average, 1, RmsLevel::kMinLevelDb, 64);
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
+ levels.peak, 1, RmsLevel::kMinLevelDb, 64);
+ }
+
+ if (submodules_.agc_manager) {
+ int level = recommended_stream_analog_level_locked();
+ data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
+ &level);
+ }
+
+ // Compute echo-related stats.
if (submodules_.echo_controller) {
auto ec_metrics = submodules_.echo_controller->GetMetrics();
capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
@@ -1332,6 +1314,13 @@
ec_metrics.echo_return_loss_enhancement;
capture_.stats.delay_ms = ec_metrics.delay_ms;
}
+ if (config_.residual_echo_detector.enabled) {
+ RTC_DCHECK(submodules_.echo_detector);
+ auto ed_metrics = submodules_.echo_detector->GetMetrics();
+ capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
+ capture_.stats.residual_echo_likelihood_recent_max =
+ ed_metrics.echo_likelihood_recent_max;
+ }
// Pass stats for reporting.
stats_reporter_.UpdateStatistics(capture_.stats);
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index c4bbf11..8306ac7 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -419,17 +419,13 @@
const struct ApmConstants {
ApmConstants(bool multi_channel_render_support,
bool multi_channel_capture_support,
- bool enforce_split_band_hpf,
- bool minimize_processing_for_unused_output)
+ bool enforce_split_band_hpf)
: multi_channel_render_support(multi_channel_render_support),
multi_channel_capture_support(multi_channel_capture_support),
- enforce_split_band_hpf(enforce_split_band_hpf),
- minimize_processing_for_unused_output(
- minimize_processing_for_unused_output) {}
+ enforce_split_band_hpf(enforce_split_band_hpf) {}
bool multi_channel_render_support;
bool multi_channel_capture_support;
bool enforce_split_band_hpf;
- bool minimize_processing_for_unused_output;
} constants_;
struct ApmCaptureState {