Enable render downmixing to mono in AudioProcessing. In practice, we have been doing this since time immemorial, but have relied on the user to do the downmixing (first voice engine then Chromium). It's more logical for this burden to fall on AudioProcessing, however, who can be expected to know that this is a reasonable approach for AEC. Permitting two render channels results in running two AECs serially. Critically, in my recent change to have Chromium adopt the float interface: https://codereview.chromium.org/420603004 I removed the downmixing by Chromium, forgetting that we hadn't yet enabled this feature in AudioProcessing. This corrects that oversight. The change in paths hit by production users is very minor. As commented it required adding downmixing to the int16_t path to satisfy bit-exactness tests. For reference, find the ApmTest.Process errors here: https://paste.googleplex.com/6372007910309888 BUG=webrtc:3853 TESTED=listened to the files output from the Process test, and verified that they sound as expected: higher echo while the AEC is adapting, but afterwards very close. R=aluebs@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/31459004 git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@7292 4adac7df-926f-26a2-2b94-8c16560cd09d

commit: 6a4908d7c43142e8f2843669b4d6d709cc61f9b4 [log] [tgz]
author: andrew@webrtc.org <andrew@webrtc.org> Wed Sep 24 20:06:23 2014 +0000
committer: andrew@webrtc.org <andrew@webrtc.org> Wed Sep 24 20:06:23 2014 +0000
tree: 11aff0140c1640a0f59d8c37cd03b000c6c15f3b
parent: eb091efeafa9539e4c040b77d6208fe8f511157a [diff]
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index fb2c200..2bbd771 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc

@@ -406,19 +406,32 @@
 // TODO(andrew): Do deinterleaving and mixing in one step?
 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
   assert(proc_samples_per_channel_ == input_samples_per_channel_);
-  assert(num_proc_channels_ == num_input_channels_);
-  assert(frame->num_channels_ == num_proc_channels_);
+  assert(frame->num_channels_ == num_input_channels_);
   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
   InitForNewData();
   activity_ = frame->vad_activity_;
 
-  int16_t* interleaved = frame->data_;
-  for (int i = 0; i < num_proc_channels_; i++) {
-    int16_t* deinterleaved = channels_->ibuf()->channel(i);
-    int interleaved_idx = i;
-    for (int j = 0; j < proc_samples_per_channel_; j++) {
-      deinterleaved[j] = interleaved[interleaved_idx];
-      interleaved_idx += num_proc_channels_;
+  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
+    // Downmix directly; no explicit deinterleaving needed.
+    int16_t* downmixed = channels_->ibuf()->channel(0);
+    for (int i = 0; i < input_samples_per_channel_; ++i) {
+      // HACK(ajm): The downmixing in the int16_t path is in practice never
+      // called from production code. We do this weird scaling to and from float
+      // to satisfy tests checking for bit-exactness with the float path.
+      float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
+                             ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
+      downmixed[i] = ScaleAndRoundToInt16(downmix_float);
+    }
+  } else {
+    assert(num_proc_channels_ == num_input_channels_);
+    int16_t* interleaved = frame->data_;
+    for (int i = 0; i < num_proc_channels_; ++i) {
+      int16_t* deinterleaved = channels_->ibuf()->channel(i);
+      int interleaved_idx = i;
+      for (int j = 0; j < proc_samples_per_channel_; ++j) {
+        deinterleaved[j] = interleaved[interleaved_idx];
+        interleaved_idx += num_proc_channels_;
+      }
     }
   }
 }

diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 49a7d4f..d91cbd2 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc

@@ -257,10 +257,9 @@
     }
   }
 
-  // TODO(ajm): Enable this.
-  // Always downmix the reverse stream to mono for analysis.
-  //rev_proc_format_.set(rev_proc_rate, 1);
-  rev_proc_format_.set(rev_proc_rate, rev_in_format_.num_channels());
+  // Always downmix the reverse stream to mono for analysis. This has been
+  // demonstrated to work well for AEC in most practical scenarios.
+  rev_proc_format_.set(rev_proc_rate, 1);
 
   if (fwd_proc_format_.rate() == kSampleRate32kHz) {
     split_rate_ = kSampleRate16kHz;

diff --git a/modules/audio_processing/test/audio_processing_unittest.cc b/modules/audio_processing/test/audio_processing_unittest.cc
index 3a35fe5..a0fb303 100644
--- a/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/modules/audio_processing/test/audio_processing_unittest.cc

@@ -751,7 +751,8 @@
   for (int i = 1; i < 3; i++) {
     TestChangingChannels(i, kNoErr);
     EXPECT_EQ(i, apm_->num_input_channels());
-    EXPECT_EQ(i, apm_->num_reverse_channels());
+    // We always force the number of reverse channels used for processing to 1.
+    EXPECT_EQ(1, apm_->num_reverse_channels());
   }
 }
 
@@ -1671,7 +1672,7 @@
     const int num_output_channels = test->num_output_channels();
     const int samples_per_channel = test->sample_rate() *
         AudioProcessing::kChunkSizeMs / 1000;
-    const int output_length =  samples_per_channel * num_output_channels;
+    const int output_length = samples_per_channel * num_output_channels;
 
     Init(test->sample_rate(), test->sample_rate(), test->sample_rate(),
          num_input_channels, num_output_channels, num_render_channels, true);
commit	6a4908d7c43142e8f2843669b4d6d709cc61f9b4	[log] [tgz]
author	andrew@webrtc.org <andrew@webrtc.org>	Wed Sep 24 20:06:23 2014 +0000
committer	andrew@webrtc.org <andrew@webrtc.org>	Wed Sep 24 20:06:23 2014 +0000
tree	11aff0140c1640a0f59d8c37cd03b000c6c15f3b
parent	eb091efeafa9539e4c040b77d6208fe8f511157a [diff]