This is related to an earlier CL of enabling Opus 48 kHz.
https://webrtc-codereview.appspot.com/16619005/

It was reverted due to a build bot error, which this CL is to fix. The problem was that when audio conference mixer receives audio frames all at 48 kHz and mixed them, it uses Audio Processing Module (APM) to do a post-processing. However the APM cannot handle 48 kHz input. The current solution is not to allow the mixer to output 48 kHz.

TEST=locally solved https://webrtc-codereview.appspot.com/16619005/

BUG=
R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/20779004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6730 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
index bc97ec2..6ef6166 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -32,10 +32,13 @@
 // stereo at most.
 //
 // TODO(andrew): consider not modifying |frame| here.
-void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame) {
+void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame, bool use_limiter) {
   assert(mixed_frame->num_channels_ >= frame->num_channels_);
-  // Divide by two to avoid saturation in the mixing.
-  *frame >>= 1;
+  if (use_limiter) {
+    // Divide by two to avoid saturation in the mixing.
+    // This is only meaningful if the limiter will be used.
+    *frame >>= 1;
+  }
   if (mixed_frame->num_channels_ > frame->num_channels_) {
     // We only support mono-to-stereo.
     assert(mixed_frame->num_channels_ == 2 &&
@@ -131,6 +134,7 @@
       _participantList(),
       _additionalParticipantList(),
       _numMixedParticipants(0),
+      use_limiter_(true),
       _timeStamp(0),
       _timeScheduler(kProcessPeriodicityInMs),
       _mixedAudioLevel(),
@@ -308,6 +312,11 @@
 
         _timeStamp += _sampleSize;
 
+        // We only use the limiter if it supports the output sample rate and
+        // we're actually mixing multiple streams.
+        use_limiter_ = _numMixedParticipants > 1 &&
+                       _outputFrequency <= kAudioProcMaxNativeSampleRateHz;
+
         MixFromList(*mixedAudio, &mixList);
         MixAnonomouslyFromList(*mixedAudio, &additionalFramesList);
         MixAnonomouslyFromList(*mixedAudio, &rampOutList);
@@ -946,14 +955,6 @@
     if(audioFrameList->empty()) return 0;
 
     uint32_t position = 0;
-    if(_numMixedParticipants == 1) {
-        // No mixing required here; skip the saturation protection.
-        AudioFrame* audioFrame = audioFrameList->front();
-        mixedAudio.CopyFrom(*audioFrame);
-        SetParticipantStatistics(&_scratchMixedParticipants[position],
-                                 *audioFrame);
-        return 0;
-    }
 
     if (_numMixedParticipants == 1) {
       mixedAudio.timestamp_ = audioFrameList->front()->timestamp_;
@@ -979,7 +980,7 @@
             assert(false);
             position = 0;
         }
-        MixFrames(&mixedAudio, (*iter));
+        MixFrames(&mixedAudio, (*iter), use_limiter_);
 
         SetParticipantStatistics(&_scratchMixedParticipants[position],
                                  **iter);
@@ -999,24 +1000,17 @@
 
     if(audioFrameList->empty()) return 0;
 
-    if(_numMixedParticipants == 1) {
-        // No mixing required here; skip the saturation protection.
-        AudioFrame* audioFrame = audioFrameList->front();
-        mixedAudio.CopyFrom(*audioFrame);
-        return 0;
-    }
-
     for (AudioFrameList::const_iterator iter = audioFrameList->begin();
          iter != audioFrameList->end();
          ++iter) {
-        MixFrames(&mixedAudio, *iter);
+        MixFrames(&mixedAudio, *iter, use_limiter_);
     }
     return 0;
 }
 
 bool AudioConferenceMixerImpl::LimitMixedAudio(AudioFrame& mixedAudio) {
-    if(_numMixedParticipants == 1) {
-        return true;
+    if (!use_limiter_) {
+      return true;
     }
 
     // Smoothly limit the mixed frame.
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h
index 31dc71e..44f4ff0 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h
@@ -192,6 +192,9 @@
     MixerParticipantList _additionalParticipantList;
 
     size_t _numMixedParticipants;
+    // Determines if we will use a limiter for clipping protection during
+    // mixing.
+    bool use_limiter_;
 
     uint32_t _timeStamp;
 
diff --git a/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc b/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc
index eb520b8..2a5732b 100644
--- a/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc
+++ b/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc
@@ -20,8 +20,12 @@
 
 const int16_t kLimiterHeadroom = 29204;  // == -1 dbFS
 const int16_t kInt16Max = 0x7fff;
-const int kSampleRateHz = 16000;
+const int kPayloadType = 105;
+const int kInSampleRateHz = 16000;  // Input file taken as 16 kHz by default.
+const int kRecSampleRateHz = 16000;  // Recorded with 16 kHz L16.
 const int kTestDurationMs = 3000;
+const CodecInst kCodecL16 = {kPayloadType, "L16", 16000, 160, 1, 256000};
+const CodecInst kCodecOpus = {kPayloadType, "opus", 48000, 960, 1, 32000};
 
 }  // namespace
 
@@ -54,7 +58,8 @@
                      bool real_audio,
                      int16_t input_value,
                      int16_t max_output_value,
-                     int16_t min_output_value) {
+                     int16_t min_output_value,
+                     const CodecInst& codec_inst) {
     ASSERT_LE(num_remote_streams_using_mono, num_remote_streams);
 
     if (real_audio) {
@@ -77,7 +82,8 @@
       remote_streams[i] = voe_base_->CreateChannel();
       EXPECT_NE(-1, remote_streams[i]);
     }
-    StartRemoteStreams(remote_streams, num_remote_streams_using_mono);
+    StartRemoteStreams(remote_streams, num_remote_streams_using_mono,
+                       codec_inst);
     TEST_LOG("Playing %d remote streams.\n", num_remote_streams);
 
     // Give it plenty of time to get started.
@@ -106,7 +112,7 @@
   void GenerateInputFile(int16_t input_value) {
     FILE* input_file = fopen(input_filename_.c_str(), "wb");
     ASSERT_TRUE(input_file != NULL);
-    for (int i = 0; i < kSampleRateHz / 1000 * (kTestDurationMs * 2); i++) {
+    for (int i = 0; i < kInSampleRateHz / 1000 * (kTestDurationMs * 2); i++) {
       ASSERT_EQ(1u, fwrite(&input_value, sizeof(input_value), 1, input_file));
     }
     ASSERT_EQ(0, fclose(input_file));
@@ -129,7 +135,7 @@
     // Ensure we've at least recorded half as much file as the duration of the
     // test. We have to use a relaxed tolerance here due to filesystem flakiness
     // on the bots.
-    ASSERT_GE((samples_read * 1000.0) / kSampleRateHz, kTestDurationMs);
+    ASSERT_GE((samples_read * 1000.0) / kRecSampleRateHz, kTestDurationMs);
     // Ensure we read the entire file.
     ASSERT_NE(0, feof(output_file));
     ASSERT_EQ(0, fclose(output_file));
@@ -153,17 +159,8 @@
 
   // Start up remote streams ("normal" participants).
   void StartRemoteStreams(const std::vector<int>& streams,
-                          int num_remote_streams_using_mono) {
-    // Use L16 at 16kHz to minimize distortion (file recording is 16kHz and
-    // resampling will cause distortion).
-    CodecInst codec_inst;
-    strcpy(codec_inst.plname, "L16");
-    codec_inst.channels = 1;
-    codec_inst.plfreq = kSampleRateHz;
-    codec_inst.pltype = 105;
-    codec_inst.pacsize = codec_inst.plfreq / 100;
-    codec_inst.rate = codec_inst.plfreq * sizeof(int16_t) * 8;  // 8 bits/byte.
-
+                          int num_remote_streams_using_mono,
+                          const CodecInst& codec_inst) {
     for (int i = 0; i < num_remote_streams_using_mono; ++i) {
       // Add some delay between starting up the channels in order to give them
       // different energies in the "real audio" test and hopefully exercise
@@ -173,10 +170,11 @@
     }
 
     // The remainder of the streams will use stereo.
-    codec_inst.channels = 2;
-    codec_inst.pltype++;
+    CodecInst codec_inst_stereo = codec_inst;
+    codec_inst_stereo.channels = 2;
+    codec_inst_stereo.pltype++;
     for (size_t i = num_remote_streams_using_mono; i < streams.size(); ++i) {
-      StartRemoteStream(streams[i], codec_inst, 1234 + 2 * i);
+      StartRemoteStream(streams[i], codec_inst_stereo, 1234 + 2 * i);
     }
   }
 
@@ -210,7 +208,7 @@
     EXPECT_NE(-1, size);
     fclose(fid);
     // Divided by 2 due to 2 bytes/sample.
-    return size * 1000 / kSampleRateHz / 2;
+    return size * 1000 / kRecSampleRateHz / 2;
   }
 
   std::string input_filename_;
@@ -222,7 +220,11 @@
 // somewhat more realistic scenario using real audio. It can at least hunt for
 // asserts and crashes.
 TEST_F(MixingTest, MixManyChannelsForStress) {
-  RunMixingTest(10, 0, 10, true, 0, 0, 0);
+  RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecL16);
+}
+
+TEST_F(MixingTest, MixManyChannelsForStressOpus) {
+  RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecOpus);
 }
 
 // These tests assume a maximum of three mixed participants. We typically allow
@@ -232,7 +234,7 @@
   const int16_t kInputValue = 1000;
   const int16_t kExpectedOutput = kInputValue * 3;
   RunMixingTest(4, 0, 4, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }
 
 // Ensure the mixing saturation protection is working. We can do this because
@@ -245,7 +247,7 @@
   ASSERT_GT(kInputValue * 3, kInt16Max);
   ASSERT_LT(1.1 * kExpectedOutput, kInt16Max);
   RunMixingTest(3, 0, 3, false, kInputValue, 1.1 * kExpectedOutput,
-               0.9 * kExpectedOutput);
+               0.9 * kExpectedOutput, kCodecL16);
 }
 
 TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) {
@@ -255,21 +257,21 @@
   ASSERT_GT(0.95 * kExpectedOutput, kLimiterHeadroom);
   // Tighter constraints are required here to properly test this.
   RunMixingTest(1, 0, 1, false, kInputValue, kExpectedOutput,
-                0.95 * kExpectedOutput);
+                0.95 * kExpectedOutput, kCodecL16);
 }
 
 TEST_F(MixingTest, VerifyAnonymousAndNormalParticipantMixing) {
   const int16_t kInputValue = 1000;
   const int16_t kExpectedOutput = kInputValue * 2;
   RunMixingTest(1, 1, 1, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }
 
 TEST_F(MixingTest, AnonymousParticipantsAreAlwaysMixed) {
   const int16_t kInputValue = 1000;
   const int16_t kExpectedOutput = kInputValue * 4;
   RunMixingTest(3, 1, 3, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }
 
 TEST_F(MixingTest, VerifyStereoAndMonoMixing) {
@@ -277,7 +279,7 @@
   const int16_t kExpectedOutput = kInputValue * 2;
   RunMixingTest(2, 0, 1, false, kInputValue, 1.1 * kExpectedOutput,
                 // Lower than 0.9 due to observed flakiness on bots.
-                0.8 * kExpectedOutput);
+                0.8 * kExpectedOutput, kCodecL16);
 }
 
 }  // namespace webrtc