Fix jitter buffer bug around out-of-order packets and non-RTX padding.

tl;dr - non-continuous frames (due to padding) would get stuck as incomplete if the previous complete frame arrived and was decoded before the padding arrived.This fix re-checks the incomplete frame list for continuous frames after old packets arrive.

When padding is enabled and RTX is not, padding is sent as empty RTP packets tacked onto the end of completed frames (meaning: same timestamp, but after a packet with the marker bit set). Given the following set of circumstances, codified in the new unit test method, a frame can get permanently stuck in the incomplete frames list:

- Frame A decoded (packets 94-95). Next expected sequence number is 96.
- Frame C arrives (packets 100-101) and is marked complete. It isn't continuous, since it starts at 100, so it's placed in the incomplete frame list.
- Frame B arrives (packets 96-97) and is complete, since 97 has a marker bit.  Turns out that packets 98-99 are padding, but the receiver doesn't know that.
- Frame B is decoded, removed from the decodable frames list, and last decoded state is updated.
- Packets 98-99 arrive. They hit the IsOldPacket check and update the last decoded state, but they don't trigger FindAndInsertContinuousFrames.
- Further packets/frames arrive and complete, but FindAndInsertContinuousFrames only runs on frames that are newer than the newly completed frame.

In this state, Frame C is permanently stuck as incomplete, so the jitter buffer overall is stuck until max NACK age (default: 450 packets), the max NACK list size (default: 200 packets), or a keyframe arrives and IsContinuous returns true for the keyframe.

(Before the November refactoring, Frame B wouldn't have to have been decoded for the bug to trigger; just having a complete continuous frame at any time before the padding arrived would cause this state, as FindAndInsertContinuousFrames was only called when the frame originally became continuous and was inserted into the decodable frames list. Post refactoring, the frame is removed/re-added to the decodable list on every padding packet that arrives)

BUG=
R=stefan@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/50959004

Cr-Commit-Position: refs/heads/master@{#9264}
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
index cd914be..686b182 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
@@ -573,6 +573,9 @@
     last_decoded_state_.UpdateOldPacket(&packet);
     DropPacketsFromNackList(last_decoded_state_.sequence_num());
 
+    // Also see if this old packet made more incomplete frames continuous.
+    FindAndInsertContinuousFramesWithState(last_decoded_state_);
+
     if (num_consecutive_old_packets_ > kMaxConsecutiveOldPackets) {
       LOG(LS_WARNING)
           << num_consecutive_old_packets_
@@ -749,6 +752,16 @@
   VCMDecodingState decoding_state;
   decoding_state.CopyFrom(last_decoded_state_);
   decoding_state.SetState(&new_frame);
+  FindAndInsertContinuousFramesWithState(decoding_state);
+}
+
+void VCMJitterBuffer::FindAndInsertContinuousFramesWithState(
+    const VCMDecodingState& original_decoded_state) {
+  // Copy original_decoded_state so we can move the state forward with each
+  // decodable frame we find.
+  VCMDecodingState decoding_state;
+  decoding_state.CopyFrom(original_decoded_state);
+
   // When temporal layers are available, we search for a complete or decodable
   // frame until we hit one of the following:
   // 1. Continuous base or sync layer.
@@ -756,7 +769,8 @@
   for (FrameList::iterator it = incomplete_frames_.begin();
        it != incomplete_frames_.end();)  {
     VCMFrameBuffer* frame = it->second;
-    if (IsNewerTimestamp(new_frame.TimeStamp(), frame->TimeStamp())) {
+    if (IsNewerTimestamp(original_decoded_state.time_stamp(),
+                         frame->TimeStamp())) {
       ++it;
       continue;
     }
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.h b/webrtc/modules/video_coding/main/source/jitter_buffer.h
index d2cbc27..5ccc92e 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.h
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.h
@@ -214,6 +214,12 @@
   // all decodable frames into account.
   bool IsContinuous(const VCMFrameBuffer& frame) const
       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
+  // Looks for frames in |incomplete_frames_| which are continuous in the
+  // provided |decoded_state|. Starts the search from the timestamp of
+  // |decoded_state|.
+  void FindAndInsertContinuousFramesWithState(
+      const VCMDecodingState& decoded_state)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
   // Looks for frames in |incomplete_frames_| which are continuous in
   // |last_decoded_state_| taking all decodable frames into account. Starts
   // the search from |new_frame|.
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc b/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
index d70973e..36abf6d 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
@@ -552,6 +552,63 @@
   jitter_buffer_->ReleaseFrame(frame_out);
 }
 
+TEST_F(TestBasicJitterBuffer, TestReorderingWithPadding) {
+  packet_->frameType = kVideoFrameKey;
+  packet_->isFirstPacket = true;
+  packet_->markerBit = true;
+
+  // Send in an initial good packet/frame (Frame A) to start things off.
+  bool retransmitted = false;
+  EXPECT_EQ(kCompleteSession,
+            jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+  VCMEncodedFrame* frame_out = DecodeCompleteFrame();
+  EXPECT_TRUE(frame_out != NULL);
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  // Now send in a complete delta frame (Frame C), but with a sequence number
+  // gap. No pic index either, so no temporal scalability cheating :)
+  packet_->frameType = kVideoFrameDelta;
+  // Leave a gap of 2 sequence numbers and two frames.
+  packet_->seqNum = seq_num_ + 3;
+  packet_->timestamp = timestamp_ + (66 * 90);
+  // Still isFirst = marker = true.
+  // Session should be complete (frame is complete), but there's nothing to
+  // decode yet.
+  EXPECT_EQ(kCompleteSession,
+            jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+  frame_out = DecodeCompleteFrame();
+  EXPECT_TRUE(frame_out == NULL);
+
+  // Now send in a complete delta frame (Frame B) that is continuous from A, but
+  // doesn't fill the full gap to C. The rest of the gap is going to be padding.
+  packet_->seqNum = seq_num_ + 1;
+  packet_->timestamp = timestamp_ + (33 * 90);
+  // Still isFirst = marker = true.
+  EXPECT_EQ(kCompleteSession,
+            jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+  frame_out = DecodeCompleteFrame();
+  EXPECT_TRUE(frame_out != NULL);
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  // But Frame C isn't continuous yet.
+  frame_out = DecodeCompleteFrame();
+  EXPECT_TRUE(frame_out == NULL);
+
+  // Add in the padding. These are empty packets (data length is 0) with no
+  // marker bit and matching the timestamp of Frame B.
+  VCMPacket empty_packet(data_, 0, seq_num_ + 2, timestamp_ + (33 * 90), false);
+  EXPECT_EQ(kOldPacket,
+            jitter_buffer_->InsertPacket(empty_packet, &retransmitted));
+  empty_packet.seqNum += 1;
+  EXPECT_EQ(kOldPacket,
+            jitter_buffer_->InsertPacket(empty_packet, &retransmitted));
+
+  // But now Frame C should be ready!
+  frame_out = DecodeCompleteFrame();
+  EXPECT_TRUE(frame_out != NULL);
+  jitter_buffer_->ReleaseFrame(frame_out);
+}
+
 TEST_F(TestBasicJitterBuffer, DuplicatePackets) {
   packet_->frameType = kVideoFrameKey;
   packet_->isFirstPacket = true;