Adding a new constraint to set NetEq buffer capacity from peerconnection

This change makes it possible to set a custom value for the maximum
capacity of the packet buffer in NetEq (the audio jitter buffer). The
default value is 50 packets, but any value can be set with the new
functionality.

R=jmarusic@webrtc.org, mflodman@webrtc.org, pthatcher@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/50869004

Cr-Commit-Position: refs/heads/master@{#9159}
diff --git a/talk/app/webrtc/java/jni/peerconnection_jni.cc b/talk/app/webrtc/java/jni/peerconnection_jni.cc
index 460ad7e..f8f363a 100644
--- a/talk/app/webrtc/java/jni/peerconnection_jni.cc
+++ b/talk/app/webrtc/java/jni/peerconnection_jni.cc
@@ -1304,6 +1304,9 @@
       "Ljava/util/List;");
   jobject j_ice_servers = GetObjectField(jni, j_rtc_config, j_ice_servers_id);
 
+  jfieldID j_audio_jitter_buffer_max_packets_id = GetFieldID(
+      jni, j_rtc_config_class, "audioJitterBufferMaxPackets",
+      "I");
   PeerConnectionInterface::RTCConfiguration rtc_config;
 
   rtc_config.type =
@@ -1312,6 +1315,8 @@
   rtc_config.tcp_candidate_policy =
       JavaTcpCandidatePolicyToNativeType(jni, j_tcp_candidate_policy);
   JavaIceServersToJsepIceServers(jni, j_ice_servers, &rtc_config.servers);
+  rtc_config.audio_jitter_buffer_max_packets =
+      GetIntField(jni, j_rtc_config, j_audio_jitter_buffer_max_packets_id);
 
   PCOJava* observer = reinterpret_cast<PCOJava*>(observer_p);
   observer->SetConstraints(new ConstraintsWrapper(jni, j_constraints));
diff --git a/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java b/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java
index 8fcc975..80e7bfe 100644
--- a/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java
+++ b/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java
@@ -128,12 +128,14 @@
     public List<IceServer> iceServers;
     public BundlePolicy bundlePolicy;
     public TcpCandidatePolicy tcpCandidatePolicy;
+    public int audioJitterBufferMaxPackets;
 
     public RTCConfiguration(List<IceServer> iceServers) {
       iceTransportsType = IceTransportsType.ALL;
       bundlePolicy = BundlePolicy.BALANCED;
       tcpCandidatePolicy = TcpCandidatePolicy.ENABLED;
       this.iceServers = iceServers;
+      audioJitterBufferMaxPackets = 50;
     }
   };
 
diff --git a/talk/app/webrtc/peerconnection.cc b/talk/app/webrtc/peerconnection.cc
index 39284c1..5226041 100644
--- a/talk/app/webrtc/peerconnection.cc
+++ b/talk/app/webrtc/peerconnection.cc
@@ -382,9 +382,7 @@
 
   // Initialize the WebRtcSession. It creates transport channels etc.
   if (!session_->Initialize(factory_->options(), constraints,
-                            dtls_identity_service,
-                            configuration.type,
-                            configuration.bundle_policy))
+                            dtls_identity_service, configuration))
     return false;
 
   // Register PeerConnection as receiver of local ice candidates.
diff --git a/talk/app/webrtc/peerconnectioninterface.h b/talk/app/webrtc/peerconnectioninterface.h
index ed4f5b3..e32676e 100644
--- a/talk/app/webrtc/peerconnectioninterface.h
+++ b/talk/app/webrtc/peerconnectioninterface.h
@@ -211,11 +211,13 @@
     IceServers servers;
     BundlePolicy bundle_policy;
     TcpCandidatePolicy tcp_candidate_policy;
+    int audio_jitter_buffer_max_packets;
 
     RTCConfiguration()
         : type(kAll),
           bundle_policy(kBundlePolicyBalanced),
-          tcp_candidate_policy(kTcpCandidatePolicyEnabled) {}
+          tcp_candidate_policy(kTcpCandidatePolicyEnabled),
+          audio_jitter_buffer_max_packets(50) {}
   };
 
   struct RTCOfferAnswerOptions {
diff --git a/talk/app/webrtc/webrtcsession.cc b/talk/app/webrtc/webrtcsession.cc
index dd5d858..e2a9d60 100644
--- a/talk/app/webrtc/webrtcsession.cc
+++ b/talk/app/webrtc/webrtcsession.cc
@@ -521,9 +521,8 @@
     const PeerConnectionFactoryInterface::Options& options,
     const MediaConstraintsInterface*  constraints,
     DTLSIdentityServiceInterface* dtls_identity_service,
-    PeerConnectionInterface::IceTransportsType ice_transport_type,
-    PeerConnectionInterface::BundlePolicy bundle_policy) {
-  bundle_policy_ = bundle_policy;
+    const PeerConnectionInterface::RTCConfiguration& rtc_configuration) {
+  bundle_policy_ = rtc_configuration.bundle_policy;
 
   // TODO(perkj): Take |constraints| into consideration. Return false if not all
   // mandatory constraints can be fulfilled. Note that |constraints|
@@ -640,6 +639,9 @@
       MediaConstraintsInterface::kCombinedAudioVideoBwe,
       &audio_options_.combined_audio_video_bwe);
 
+  audio_options_.audio_jitter_buffer_max_packets.Set(
+      rtc_configuration.audio_jitter_buffer_max_packets);
+
   const cricket::VideoCodec default_codec(
       JsepSessionDescription::kDefaultVideoCodecId,
       JsepSessionDescription::kDefaultVideoCodecName,
@@ -667,7 +669,7 @@
     webrtc_session_desc_factory_->SetSdesPolicy(cricket::SEC_DISABLED);
   }
   port_allocator()->set_candidate_filter(
-      ConvertIceTransportTypeToCandidateFilter(ice_transport_type));
+      ConvertIceTransportTypeToCandidateFilter(rtc_configuration.type));
   return true;
 }
 
diff --git a/talk/app/webrtc/webrtcsession.h b/talk/app/webrtc/webrtcsession.h
index ce0fb0c..aa1deb5 100644
--- a/talk/app/webrtc/webrtcsession.h
+++ b/talk/app/webrtc/webrtcsession.h
@@ -117,11 +117,11 @@
                 MediaStreamSignaling* mediastream_signaling);
   virtual ~WebRtcSession();
 
-  bool Initialize(const PeerConnectionFactoryInterface::Options& options,
-                  const MediaConstraintsInterface* constraints,
-                  DTLSIdentityServiceInterface* dtls_identity_service,
-                  PeerConnectionInterface::IceTransportsType ice_transport_type,
-                  PeerConnectionInterface::BundlePolicy bundle_policy);
+  bool Initialize(
+      const PeerConnectionFactoryInterface::Options& options,
+      const MediaConstraintsInterface* constraints,
+      DTLSIdentityServiceInterface* dtls_identity_service,
+      const PeerConnectionInterface::RTCConfiguration& rtc_configuration);
   // Deletes the voice, video and data channel and changes the session state
   // to STATE_RECEIVEDTERMINATE.
   void Terminate();
diff --git a/talk/app/webrtc/webrtcsession_unittest.cc b/talk/app/webrtc/webrtcsession_unittest.cc
index 3efc112..e4f39f8 100644
--- a/talk/app/webrtc/webrtcsession_unittest.cc
+++ b/talk/app/webrtc/webrtcsession_unittest.cc
@@ -156,6 +156,8 @@
     "a=rtpmap:96 rtx/90000\r\n"
     "a=fmtp:96 apt=0\r\n";
 
+static const int kAudioJitterBufferMaxPackets = 50;
+
 // Add some extra |newlines| to the |message| after |line|.
 static void InjectAfter(const std::string& line,
                         const std::string& newlines,
@@ -383,8 +385,7 @@
 
   void Init(
       DTLSIdentityServiceInterface* identity_service,
-      PeerConnectionInterface::IceTransportsType ice_transport_type,
-      PeerConnectionInterface::BundlePolicy bundle_policy) {
+      const PeerConnectionInterface::RTCConfiguration& rtc_configuration) {
     ASSERT_TRUE(session_.get() == NULL);
     session_.reset(new WebRtcSessionForTest(
         channel_manager_.get(), rtc::Thread::Current(),
@@ -398,33 +399,51 @@
         observer_.ice_gathering_state_);
 
     EXPECT_TRUE(session_->Initialize(options_, constraints_.get(),
-                                     identity_service, ice_transport_type,
-                                     bundle_policy));
+                                     identity_service, rtc_configuration));
     session_->set_metrics_observer(&metrics_observer_);
   }
 
   void Init() {
-    Init(NULL, PeerConnectionInterface::kAll,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
   }
 
   void InitWithIceTransport(
       PeerConnectionInterface::IceTransportsType ice_transport_type) {
-    Init(NULL, ice_transport_type,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = ice_transport_type;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
   }
 
   void InitWithBundlePolicy(
       PeerConnectionInterface::BundlePolicy bundle_policy) {
-    Init(NULL, PeerConnectionInterface::kAll, bundle_policy);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy = bundle_policy;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
   }
 
   void InitWithDtls(bool identity_request_should_fail = false) {
     FakeIdentityService* identity_service = new FakeIdentityService();
     identity_service->set_should_fail(identity_request_should_fail);
-    Init(identity_service,
-         PeerConnectionInterface::kAll,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(identity_service, configuration);
   }
 
   void InitWithDtmfCodec() {
diff --git a/talk/media/base/mediachannel.h b/talk/media/base/mediachannel.h
index dd78f0e..d77ddbb 100644
--- a/talk/media/base/mediachannel.h
+++ b/talk/media/base/mediachannel.h
@@ -150,6 +150,8 @@
     noise_suppression.SetFrom(change.noise_suppression);
     highpass_filter.SetFrom(change.highpass_filter);
     stereo_swapping.SetFrom(change.stereo_swapping);
+    audio_jitter_buffer_max_packets.SetFrom(
+        change.audio_jitter_buffer_max_packets);
     typing_detection.SetFrom(change.typing_detection);
     aecm_generate_comfort_noise.SetFrom(change.aecm_generate_comfort_noise);
     conference_mode.SetFrom(change.conference_mode);
@@ -180,6 +182,7 @@
         noise_suppression == o.noise_suppression &&
         highpass_filter == o.highpass_filter &&
         stereo_swapping == o.stereo_swapping &&
+        audio_jitter_buffer_max_packets == o.audio_jitter_buffer_max_packets &&
         typing_detection == o.typing_detection &&
         aecm_generate_comfort_noise == o.aecm_generate_comfort_noise &&
         conference_mode == o.conference_mode &&
@@ -210,6 +213,8 @@
     ost << ToStringIfSet("ns", noise_suppression);
     ost << ToStringIfSet("hf", highpass_filter);
     ost << ToStringIfSet("swap", stereo_swapping);
+    ost << ToStringIfSet("audio_jitter_buffer_max_packets",
+                         audio_jitter_buffer_max_packets);
     ost << ToStringIfSet("typing", typing_detection);
     ost << ToStringIfSet("comfort_noise", aecm_generate_comfort_noise);
     ost << ToStringIfSet("conference", conference_mode);
@@ -248,6 +253,8 @@
   Settable<bool> highpass_filter;
   // Audio processing to swap the left and right channels.
   Settable<bool> stereo_swapping;
+  // Audio receiver jitter buffer (NetEq) max capacity in number of packets.
+  Settable<int> audio_jitter_buffer_max_packets;
   // Audio processing to detect typing.
   Settable<bool> typing_detection;
   Settable<bool> aecm_generate_comfort_noise;
diff --git a/talk/media/webrtc/fakewebrtcvoiceengine.h b/talk/media/webrtc/fakewebrtcvoiceengine.h
index 24ef846..5b86b53 100644
--- a/talk/media/webrtc/fakewebrtcvoiceengine.h
+++ b/talk/media/webrtc/fakewebrtcvoiceengine.h
@@ -41,6 +41,7 @@
 #include "webrtc/base/checks.h"
 #include "webrtc/base/gunit.h"
 #include "webrtc/base/stringutils.h"
+#include "webrtc/config.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
 
 namespace cricket {
@@ -213,7 +214,8 @@
           send_audio_level_ext_(-1),
           receive_audio_level_ext_(-1),
           send_absolute_sender_time_ext_(-1),
-          receive_absolute_sender_time_ext_(-1) {
+          receive_absolute_sender_time_ext_(-1),
+          neteq_capacity(-1) {
       memset(&send_codec, 0, sizeof(send_codec));
       memset(&rx_agc_config, 0, sizeof(rx_agc_config));
     }
@@ -249,6 +251,7 @@
     webrtc::CodecInst send_codec;
     webrtc::PacketTime last_rtp_packet_time;
     std::list<std::string> packets;
+    int neteq_capacity;
   };
 
   FakeWebRtcVoiceEngine(const cricket::AudioCodec* const* codecs,
@@ -391,7 +394,7 @@
                                 true);
     }
   }
-  int AddChannel() {
+  int AddChannel(const webrtc::Config& config) {
     if (fail_create_channel_) {
       return -1;
     }
@@ -401,6 +404,9 @@
       GetCodec(i, codec);
       ch->recv_codecs.push_back(codec);
     }
+    if (config.Get<webrtc::NetEqCapacityConfig>().enabled) {
+      ch->neteq_capacity = config.Get<webrtc::NetEqCapacityConfig>().capacity;
+    }
     channels_[++last_channel_] = ch;
     return last_channel_;
   }
@@ -447,10 +453,11 @@
     return &audio_processing_;
   }
   WEBRTC_FUNC(CreateChannel, ()) {
-    return AddChannel();
+    webrtc::Config empty_config;
+    return AddChannel(empty_config);
   }
-  WEBRTC_FUNC(CreateChannel, (const webrtc::Config& /*config*/)) {
-    return AddChannel();
+  WEBRTC_FUNC(CreateChannel, (const webrtc::Config& config)) {
+    return AddChannel(config);
   }
   WEBRTC_FUNC(DeleteChannel, (int channel)) {
     WEBRTC_CHECK_CHANNEL(channel);
@@ -1243,6 +1250,11 @@
   WEBRTC_STUB(GetAudioFrame, (int channel, int desired_sample_rate_hz,
                               webrtc::AudioFrame* frame));
   WEBRTC_STUB(SetExternalMixing, (int channel, bool enable));
+  int GetNetEqCapacity() const {
+    auto ch = channels_.find(last_channel_);
+    ASSERT(ch != channels_.end());
+    return ch->second->neteq_capacity;
+  }
 
  private:
   int GetNumDevices(int& num) {
diff --git a/talk/media/webrtc/webrtcvoiceengine.cc b/talk/media/webrtc/webrtcvoiceengine.cc
index baae3de..fcf8ef7 100644
--- a/talk/media/webrtc/webrtcvoiceengine.cc
+++ b/talk/media/webrtc/webrtcvoiceengine.cc
@@ -353,6 +353,7 @@
   options.noise_suppression.Set(true);
   options.highpass_filter.Set(true);
   options.stereo_swapping.Set(false);
+  options.audio_jitter_buffer_max_packets.Set(50);
   options.typing_detection.Set(true);
   options.conference_mode.Set(false);
   options.adjust_agc_delta.Set(0);
@@ -955,6 +956,14 @@
     }
   }
 
+  int audio_jitter_buffer_max_packets;
+  if (options.audio_jitter_buffer_max_packets.Get(
+          &audio_jitter_buffer_max_packets)) {
+    LOG(LS_INFO) << "NetEq capacity is " << audio_jitter_buffer_max_packets;
+    voe_config_.Set<webrtc::NetEqCapacityConfig>(
+        new webrtc::NetEqCapacityConfig(audio_jitter_buffer_max_packets));
+  }
+
   bool typing_detection;
   if (options.typing_detection.Get(&typing_detection)) {
     LOG(LS_INFO) << "Typing detection is enabled? " << typing_detection;
diff --git a/talk/media/webrtc/webrtcvoiceengine.h b/talk/media/webrtc/webrtcvoiceengine.h
index 4596c4d..242467d 100644
--- a/talk/media/webrtc/webrtcvoiceengine.h
+++ b/talk/media/webrtc/webrtcvoiceengine.h
@@ -46,6 +46,7 @@
 #include "webrtc/base/thread_checker.h"
 #include "webrtc/call.h"
 #include "webrtc/common.h"
+#include "webrtc/config.h"
 
 #if !defined(LIBPEERCONNECTION_LIB) && \
     !defined(LIBPEERCONNECTION_IMPLEMENTATION)
diff --git a/talk/media/webrtc/webrtcvoiceengine_unittest.cc b/talk/media/webrtc/webrtcvoiceengine_unittest.cc
index 7737f34..f995093 100644
--- a/talk/media/webrtc/webrtcvoiceengine_unittest.cc
+++ b/talk/media/webrtc/webrtcvoiceengine_unittest.cc
@@ -2882,6 +2882,7 @@
   EXPECT_TRUE(typing_detection_enabled);
   EXPECT_EQ(ec_mode, webrtc::kEcConference);
   EXPECT_EQ(ns_mode, webrtc::kNsHighSuppression);
+  EXPECT_EQ(50, voe_.GetNetEqCapacity());  // From GetDefaultEngineOptions().
 
   // Turn echo cancellation off
   options.echo_cancellation.Set(false);
diff --git a/webrtc/config.h b/webrtc/config.h
index 4e2faa3..09633ed 100644
--- a/webrtc/config.h
+++ b/webrtc/config.h
@@ -113,6 +113,20 @@
   int min_transmit_bitrate_bps;
 };
 
+// Controls the capacity of the packet buffer in NetEq. The capacity is the
+// maximum number of packets that the buffer can contain. If the limit is
+// exceeded, the buffer will be flushed. The capacity does not affect the actual
+// audio delay in the general case, since this is governed by the target buffer
+// level (calculated from the jitter profile). It is only in the rare case of
+// severe network freezes that a higher capacity will lead to a (transient)
+// increase in audio delay.
+struct NetEqCapacityConfig {
+  NetEqCapacityConfig() : enabled(false), capacity(0) {}
+  explicit NetEqCapacityConfig(int value) : enabled(true), capacity(value) {}
+  bool enabled;
+  int capacity;
+};
+
 }  // namespace webrtc
 
 #endif  // WEBRTC_CONFIG_H_
diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc
index fa63491..51b9a78 100644
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc
@@ -10,6 +10,7 @@
 
 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
 
+#include "webrtc/base/checks.h"
 #include "webrtc/common_types.h"
 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
 #include "webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.h"
@@ -20,13 +21,20 @@
 
 // Create module
 AudioCodingModule* AudioCodingModule::Create(int id) {
-  return Create(id, Clock::GetRealTimeClock());
+  Config config;
+  config.id = id;
+  config.clock = Clock::GetRealTimeClock();
+  return Create(config);
 }
 
 AudioCodingModule* AudioCodingModule::Create(int id, Clock* clock) {
-  AudioCodingModule::Config config;
+  Config config;
   config.id = id;
   config.clock = clock;
+  return Create(config);
+}
+
+AudioCodingModule* AudioCodingModule::Create(const Config& config) {
   return new acm2::AudioCodingModuleImpl(config);
 }
 
diff --git a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
index 4d4a6f3..4660589 100644
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@@ -99,6 +99,7 @@
   //
   static AudioCodingModule* Create(int id);
   static AudioCodingModule* Create(int id, Clock* clock);
+  static AudioCodingModule* Create(const Config& config);
   virtual ~AudioCodingModule() {};
 
   ///////////////////////////////////////////////////////////////////////////
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 40a974b..c477538 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -10,9 +10,11 @@
 
 #include "webrtc/voice_engine/channel.h"
 
+#include "webrtc/base/checks.h"
 #include "webrtc/base/format_macros.h"
 #include "webrtc/base/timeutils.h"
 #include "webrtc/common.h"
+#include "webrtc/config.h"
 #include "webrtc/modules/audio_device/include/audio_device.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
 #include "webrtc/modules/interface/module_common_types.h"
@@ -757,8 +759,6 @@
         VoEModuleId(instanceId, channelId), Clock::GetRealTimeClock(), this,
         this, this, rtp_payload_registry_.get())),
     telephone_event_handler_(rtp_receiver_->GetTelephoneEventHandler()),
-    audio_coding_(AudioCodingModule::Create(
-        VoEModuleId(instanceId, channelId))),
     _rtpDumpIn(*RtpDump::CreateRtpDump()),
     _rtpDumpOut(*RtpDump::CreateRtpDump()),
     _outputAudioLevel(),
@@ -828,6 +828,15 @@
 {
     WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId,_channelId),
                  "Channel::Channel() - ctor");
+    AudioCodingModule::Config acm_config;
+    acm_config.id = VoEModuleId(instanceId, channelId);
+    if (config.Get<NetEqCapacityConfig>().enabled) {
+      acm_config.neteq_config.max_packets_in_buffer =
+          config.Get<NetEqCapacityConfig>().capacity;
+      CHECK_GT(acm_config.neteq_config.max_packets_in_buffer, 0);
+    }
+    audio_coding_.reset(AudioCodingModule::Create(acm_config));
+
     _inbandDtmfQueue.ResetDtmf();
     _inbandDtmfGenerator.Init();
     _outputAudioLevel.Clear();