Implement RTCInboundRTPStreamStats.JitterBufferMinimumDelay

This metric was recently added to the standard (see https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-jitterbufferminimumdelay). This CL implements it for audio streams.

Bug: webrtc:14141
Change-Id: I79d918639cd12361ebbc28c2be41549e33fa7e2a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/262770
Reviewed-by: Tomas Gunnarsson <tommi@webrtc.org>
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#37567}
diff --git a/api/neteq/neteq.h b/api/neteq/neteq.h
index 4c9c0b6..ffc3958 100644
--- a/api/neteq/neteq.h
+++ b/api/neteq/neteq.h
@@ -67,6 +67,7 @@
   uint64_t jitter_buffer_delay_ms = 0;
   uint64_t jitter_buffer_emitted_count = 0;
   uint64_t jitter_buffer_target_delay_ms = 0;
+  uint64_t jitter_buffer_minimum_delay_ms = 0;
   uint64_t inserted_samples_for_deceleration = 0;
   uint64_t removed_samples_for_acceleration = 0;
   uint64_t silent_concealed_samples = 0;
diff --git a/api/neteq/neteq_controller.h b/api/neteq/neteq_controller.h
index 2f203f4..f0101d3 100644
--- a/api/neteq/neteq_controller.h
+++ b/api/neteq/neteq_controller.h
@@ -163,6 +163,12 @@
   // Returns the target buffer level in ms.
   virtual int TargetLevelMs() const = 0;
 
+  // Returns the target buffer level in ms as it would be if no minimum or
+  // maximum delay was set.
+  // TODO(bugs.webrtc.org/14270): Make pure virtual once all implementations are
+  // updated.
+  virtual int UnlimitedTargetLevelMs() const { return 0; }
+
   // Notify the NetEqController that a packet has arrived. Returns the relative
   // arrival delay, if it can be computed.
   virtual absl::optional<int> PacketArrived(int fs_hz,
@@ -170,7 +176,7 @@
                                             const PacketArrivedInfo& info) = 0;
 
   // Notify the NetEqController that we are currently in muted state.
-  // TODO(ivoc): Make pure virtual when downstream is updated.
+  // TODO(bugs.webrtc.org/14270): Make pure virtual when downstream is updated.
   virtual void NotifyMutedState() {}
 
   // Returns true if a peak was found.
diff --git a/api/stats/rtcstats_objects.h b/api/stats/rtcstats_objects.h
index 9a5d0de..d0a6207 100644
--- a/api/stats/rtcstats_objects.h
+++ b/api/stats/rtcstats_objects.h
@@ -453,6 +453,7 @@
   RTCStatsMember<double> last_packet_received_timestamp;
   RTCStatsMember<double> jitter_buffer_delay;
   RTCStatsMember<double> jitter_buffer_target_delay;
+  RTCStatsMember<double> jitter_buffer_minimum_delay;
   RTCStatsMember<uint64_t> jitter_buffer_emitted_count;
   RTCStatsMember<uint64_t> total_samples_received;
   RTCStatsMember<uint64_t> concealed_samples;
diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc
index 06dad42..168d214 100644
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@@ -339,6 +339,9 @@
   stats.jitter_buffer_target_delay_seconds =
       static_cast<double>(ns.jitterBufferTargetDelayMs) /
       static_cast<double>(rtc::kNumMillisecsPerSec);
+  stats.jitter_buffer_minimum_delay_seconds =
+      static_cast<double>(ns.jitterBufferMinimumDelayMs) /
+      static_cast<double>(rtc::kNumMillisecsPerSec);
   stats.inserted_samples_for_deceleration = ns.insertedSamplesForDeceleration;
   stats.removed_samples_for_acceleration = ns.removedSamplesForAcceleration;
   stats.expand_rate = Q14ToFloat(ns.currentExpandRate);
diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc
index a581bbb..75129ac 100644
--- a/audio/audio_receive_stream_unittest.cc
+++ b/audio/audio_receive_stream_unittest.cc
@@ -78,6 +78,7 @@
     /*jitterBufferDelayMs=*/789,
     /*jitterBufferEmittedCount=*/543,
     /*jitterBufferTargetDelayMs=*/123,
+    /*jitterBufferMinimumDelayMs=*/222,
     /*insertedSamplesForDeceleration=*/432,
     /*removedSamplesForAcceleration=*/321,
     /*fecPacketsReceived=*/123,
@@ -281,6 +282,9 @@
     EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferTargetDelayMs) /
                   static_cast<double>(rtc::kNumMillisecsPerSec),
               stats.jitter_buffer_target_delay_seconds);
+    EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferMinimumDelayMs) /
+                  static_cast<double>(rtc::kNumMillisecsPerSec),
+              stats.jitter_buffer_minimum_delay_seconds);
     EXPECT_EQ(kNetworkStats.insertedSamplesForDeceleration,
               stats.inserted_samples_for_deceleration);
     EXPECT_EQ(kNetworkStats.removedSamplesForAcceleration,
diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h
index faa1f12..5d3c38f 100644
--- a/call/audio_receive_stream.h
+++ b/call/audio_receive_stream.h
@@ -59,6 +59,7 @@
     double jitter_buffer_delay_seconds = 0.0;
     uint64_t jitter_buffer_emitted_count = 0;
     double jitter_buffer_target_delay_seconds = 0.0;
+    double jitter_buffer_minimum_delay_seconds = 0.0;
     uint64_t inserted_samples_for_deceleration = 0;
     uint64_t removed_samples_for_acceleration = 0;
     // Stats below DO NOT correspond directly to anything in the WebRTC stats
diff --git a/media/base/media_channel.h b/media/base/media_channel.h
index db147e5..712b634 100644
--- a/media/base/media_channel.h
+++ b/media/base/media_channel.h
@@ -451,10 +451,15 @@
   // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-jitterbufferdelay
   double jitter_buffer_delay_seconds = 0.0;
   // Target delay for the jitter buffer (cumulative).
-  // TODO(https://crbug.com/webrtc/14244): This metric is only implemented for
+  // TODO(crbug.com/webrtc/14244): This metric is only implemented for
   // audio, it should be implemented for video as well.
   // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-jitterbuffertargetdelay
   absl::optional<double> jitter_buffer_target_delay_seconds;
+  // Minimum obtainable delay for the jitter buffer (cumulative).
+  // TODO(crbug.com/webrtc/14244): This metric is only implemented for
+  // audio, it should be implemented for video as well.
+  // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-jitterbufferminimumdelay
+  absl::optional<double> jitter_buffer_minimum_delay_seconds;
   // Number of observations for cumulative jitter latency.
   // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-jitterbufferemittedcount
   uint64_t jitter_buffer_emitted_count = 0;
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index db8ec69..b34c77d 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -2368,6 +2368,8 @@
     rinfo.jitter_buffer_emitted_count = stats.jitter_buffer_emitted_count;
     rinfo.jitter_buffer_target_delay_seconds =
         stats.jitter_buffer_target_delay_seconds;
+    rinfo.jitter_buffer_minimum_delay_seconds =
+        stats.jitter_buffer_minimum_delay_seconds;
     rinfo.inserted_samples_for_deceleration =
         stats.inserted_samples_for_deceleration;
     rinfo.removed_samples_for_acceleration =
diff --git a/modules/audio_coding/acm2/acm_receiver.cc b/modules/audio_coding/acm2/acm_receiver.cc
index f70b37c..b078af1 100644
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@@ -287,6 +287,8 @@
   acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms;
   acm_stat->jitterBufferTargetDelayMs =
       neteq_lifetime_stat.jitter_buffer_target_delay_ms;
+  acm_stat->jitterBufferMinimumDelayMs =
+      neteq_lifetime_stat.jitter_buffer_minimum_delay_ms;
   acm_stat->jitterBufferEmittedCount =
       neteq_lifetime_stat.jitter_buffer_emitted_count;
   acm_stat->delayedPacketOutageSamples =
diff --git a/modules/audio_coding/include/audio_coding_module_typedefs.h b/modules/audio_coding/include/audio_coding_module_typedefs.h
index d2fe9ac..9d2fcfe 100644
--- a/modules/audio_coding/include/audio_coding_module_typedefs.h
+++ b/modules/audio_coding/include/audio_coding_module_typedefs.h
@@ -88,6 +88,7 @@
   uint64_t concealmentEvents;
   uint64_t jitterBufferDelayMs;
   uint64_t jitterBufferTargetDelayMs;
+  uint64_t jitterBufferMinimumDelayMs;
   uint64_t jitterBufferEmittedCount;
   uint64_t insertedSamplesForDeceleration;
   uint64_t removedSamplesForAcceleration;
diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc
index 3a656a7..558774d 100644
--- a/modules/audio_coding/neteq/decision_logic.cc
+++ b/modules/audio_coding/neteq/decision_logic.cc
@@ -227,6 +227,10 @@
   return target_delay_ms;
 }
 
+int DecisionLogic::UnlimitedTargetLevelMs() const {
+  return delay_manager_->UnlimitedTargetLevelMs();
+}
+
 int DecisionLogic::GetFilteredBufferLevel() const {
   if (config_.enable_stable_playout_delay) {
     return last_playout_delay_ms_ * sample_rate_khz_;
diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h
index 1e7c9fe..2e55322 100644
--- a/modules/audio_coding/neteq/decision_logic.h
+++ b/modules/audio_coding/neteq/decision_logic.h
@@ -72,6 +72,8 @@
 
   int TargetLevelMs() const override;
 
+  int UnlimitedTargetLevelMs() const override;
+
   absl::optional<int> PacketArrived(int fs_hz,
                                     bool should_update_stats,
                                     const PacketArrivedInfo& info) override;
diff --git a/modules/audio_coding/neteq/delay_manager.cc b/modules/audio_coding/neteq/delay_manager.cc
index ba8f706..bf3a0f1 100644
--- a/modules/audio_coding/neteq/delay_manager.cc
+++ b/modules/audio_coding/neteq/delay_manager.cc
@@ -101,6 +101,7 @@
     target_level_ms_ = std::max(
         target_level_ms_, reorder_optimizer_->GetOptimalDelayMs().value_or(0));
   }
+  unlimited_target_level_ms_ = target_level_ms_;
   target_level_ms_ = std::max(target_level_ms_, effective_minimum_delay_ms_);
   if (maximum_delay_ms_ > 0) {
     target_level_ms_ = std::min(target_level_ms_, maximum_delay_ms_);
@@ -134,6 +135,10 @@
   return target_level_ms_;
 }
 
+int DelayManager::UnlimitedTargetLevelMs() const {
+  return unlimited_target_level_ms_;
+}
+
 bool DelayManager::IsValidMinimumDelay(int delay_ms) const {
   return 0 <= delay_ms && delay_ms <= MinimumDelayUpperBound();
 }
diff --git a/modules/audio_coding/neteq/delay_manager.h b/modules/audio_coding/neteq/delay_manager.h
index 1c4fe42..a333681 100644
--- a/modules/audio_coding/neteq/delay_manager.h
+++ b/modules/audio_coding/neteq/delay_manager.h
@@ -61,9 +61,15 @@
   // Resets all state.
   virtual void Reset();
 
-  // Gets the target buffer level in milliseconds.
+  // Gets the target buffer level in milliseconds. If a minimum or maximum delay
+  // has been set, the target delay reported here also respects the configured
+  // min/max delay.
   virtual int TargetDelayMs() const;
 
+  // Reports the target delay that would be used if no minimum/maximum delay
+  // would be set.
+  virtual int UnlimitedTargetLevelMs() const;
+
   // Notifies the DelayManager of how much audio data is carried in each packet.
   virtual int SetPacketAudioLength(int length_ms);
 
@@ -107,7 +113,8 @@
   int maximum_delay_ms_;            // Externally set maximum allowed delay.
 
   int packet_len_ms_ = 0;
-  int target_level_ms_;  // Currently preferred buffer level.
+  int target_level_ms_ = 0;  // Currently preferred buffer level.
+  int unlimited_target_level_ms_ = 0;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index 1637ae7..6a6367d 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -2012,7 +2012,8 @@
 
     RTC_DCHECK(controller_);
     stats_->JitterBufferDelay(packet_duration, waiting_time_ms,
-                              controller_->TargetLevelMs());
+                              controller_->TargetLevelMs(),
+                              controller_->UnlimitedTargetLevelMs());
 
     packet_list->push_back(std::move(*packet));  // Store packet in list.
     packet = absl::nullopt;  // Ensure it's never used after the move.
diff --git a/modules/audio_coding/neteq/statistics_calculator.cc b/modules/audio_coding/neteq/statistics_calculator.cc
index cd5d971..9c91c85 100644
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@@ -261,12 +261,16 @@
   lifetime_stats_.total_samples_received += num_samples;
 }
 
-void StatisticsCalculator::JitterBufferDelay(size_t num_samples,
-                                             uint64_t waiting_time_ms,
-                                             uint64_t target_delay_ms) {
+void StatisticsCalculator::JitterBufferDelay(
+    size_t num_samples,
+    uint64_t waiting_time_ms,
+    uint64_t target_delay_ms,
+    uint64_t unlimited_target_delay_ms) {
   lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples;
   lifetime_stats_.jitter_buffer_target_delay_ms +=
       target_delay_ms * num_samples;
+  lifetime_stats_.jitter_buffer_minimum_delay_ms +=
+      unlimited_target_delay_ms * num_samples;
   lifetime_stats_.jitter_buffer_emitted_count += num_samples;
 }
 
diff --git a/modules/audio_coding/neteq/statistics_calculator.h b/modules/audio_coding/neteq/statistics_calculator.h
index e9b3d03..5091f31 100644
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@@ -84,7 +84,8 @@
   // Update jitter buffer delay counter.
   void JitterBufferDelay(size_t num_samples,
                          uint64_t waiting_time_ms,
-                         uint64_t target_delay_ms);
+                         uint64_t target_delay_ms,
+                         uint64_t unlimited_target_delay_ms);
 
   // Stores new packet waiting time in waiting time statistics.
   void StoreWaitingTime(int waiting_time_ms);
diff --git a/pc/rtc_stats_collector.cc b/pc/rtc_stats_collector.cc
index f79ad4e..4b92fb6 100644
--- a/pc/rtc_stats_collector.cc
+++ b/pc/rtc_stats_collector.cc
@@ -409,6 +409,10 @@
     inbound_stats->jitter_buffer_target_delay =
         *media_receiver_info.jitter_buffer_target_delay_seconds;
   }
+  if (media_receiver_info.jitter_buffer_minimum_delay_seconds) {
+    inbound_stats->jitter_buffer_minimum_delay =
+        *media_receiver_info.jitter_buffer_minimum_delay_seconds;
+  }
   inbound_stats->jitter_buffer_emitted_count =
       media_receiver_info.jitter_buffer_emitted_count;
   if (media_receiver_info.nacks_sent) {
diff --git a/pc/rtc_stats_collector_unittest.cc b/pc/rtc_stats_collector_unittest.cc
index a95d54b..5fe4fe7 100644
--- a/pc/rtc_stats_collector_unittest.cc
+++ b/pc/rtc_stats_collector_unittest.cc
@@ -2060,6 +2060,7 @@
   voice_media_info.receivers[0].jitter_ms = 4500;
   voice_media_info.receivers[0].jitter_buffer_delay_seconds = 1.0;
   voice_media_info.receivers[0].jitter_buffer_target_delay_seconds = 1.1;
+  voice_media_info.receivers[0].jitter_buffer_minimum_delay_seconds = 0.999;
   voice_media_info.receivers[0].jitter_buffer_emitted_count = 2;
   voice_media_info.receivers[0].total_samples_received = 3;
   voice_media_info.receivers[0].concealed_samples = 4;
@@ -2114,6 +2115,7 @@
   expected_audio.jitter = 4.5;
   expected_audio.jitter_buffer_delay = 1.0;
   expected_audio.jitter_buffer_target_delay = 1.1;
+  expected_audio.jitter_buffer_minimum_delay = 0.999;
   expected_audio.jitter_buffer_emitted_count = 2;
   expected_audio.total_samples_received = 3;
   expected_audio.concealed_samples = 4;
@@ -2180,6 +2182,7 @@
   video_media_info.receivers[0].jitter_ms = 1199;
   video_media_info.receivers[0].jitter_buffer_delay_seconds = 3.456;
   video_media_info.receivers[0].jitter_buffer_target_delay_seconds = 1.1;
+  video_media_info.receivers[0].jitter_buffer_minimum_delay_seconds = 0.999;
   video_media_info.receivers[0].jitter_buffer_emitted_count = 13;
 
   video_media_info.receivers[0].last_packet_received_timestamp_ms =
@@ -2241,6 +2244,7 @@
   expected_video.jitter = 1.199;
   expected_video.jitter_buffer_delay = 3.456;
   expected_video.jitter_buffer_target_delay = 1.1;
+  expected_video.jitter_buffer_minimum_delay = 0.999;
   expected_video.jitter_buffer_emitted_count = 13;
   // `expected_video.last_packet_received_timestamp` should be undefined.
   // `expected_video.content_type` should be undefined.
diff --git a/pc/rtc_stats_integrationtest.cc b/pc/rtc_stats_integrationtest.cc
index 43d75ff..4cb9c15 100644
--- a/pc/rtc_stats_integrationtest.cc
+++ b/pc/rtc_stats_integrationtest.cc
@@ -845,6 +845,8 @@
       verifier.TestMemberIsUndefined(
           inbound_stream.removed_samples_for_acceleration);
       verifier.TestMemberIsUndefined(inbound_stream.jitter_buffer_target_delay);
+      verifier.TestMemberIsUndefined(
+          inbound_stream.jitter_buffer_minimum_delay);
       verifier.TestMemberIsUndefined(inbound_stream.audio_level);
       verifier.TestMemberIsUndefined(inbound_stream.total_audio_energy);
       verifier.TestMemberIsUndefined(inbound_stream.total_samples_duration);
@@ -870,6 +872,8 @@
           inbound_stream.removed_samples_for_acceleration);
       verifier.TestMemberIsNonNegative<double>(
           inbound_stream.jitter_buffer_target_delay);
+      verifier.TestMemberIsNonNegative<double>(
+          inbound_stream.jitter_buffer_minimum_delay);
       verifier.TestMemberIsPositive<double>(inbound_stream.audio_level);
       verifier.TestMemberIsPositive<double>(inbound_stream.total_audio_energy);
       verifier.TestMemberIsPositive<double>(
diff --git a/stats/rtcstats_objects.cc b/stats/rtcstats_objects.cc
index d0e8f1f..d05c566 100644
--- a/stats/rtcstats_objects.cc
+++ b/stats/rtcstats_objects.cc
@@ -648,6 +648,7 @@
     &last_packet_received_timestamp,
     &jitter_buffer_delay,
     &jitter_buffer_target_delay,
+    &jitter_buffer_minimum_delay,
     &jitter_buffer_emitted_count,
     &total_samples_received,
     &concealed_samples,
@@ -699,6 +700,7 @@
       last_packet_received_timestamp("lastPacketReceivedTimestamp"),
       jitter_buffer_delay("jitterBufferDelay"),
       jitter_buffer_target_delay("jitterBufferTargetDelay"),
+      jitter_buffer_minimum_delay("jitterBufferMinimumDelay"),
       jitter_buffer_emitted_count("jitterBufferEmittedCount"),
       total_samples_received("totalSamplesReceived"),
       concealed_samples("concealedSamples"),
@@ -746,6 +748,7 @@
       last_packet_received_timestamp(other.last_packet_received_timestamp),
       jitter_buffer_delay(other.jitter_buffer_delay),
       jitter_buffer_target_delay(other.jitter_buffer_target_delay),
+      jitter_buffer_minimum_delay(other.jitter_buffer_minimum_delay),
       jitter_buffer_emitted_count(other.jitter_buffer_emitted_count),
       total_samples_received(other.total_samples_received),
       concealed_samples(other.concealed_samples),