Returning correct duration estimate on Opus DTX packets.

Bug 4985 revealed two flaws
1. Opus duration estimate did not return correct length for DTX packets,

2. NetEq DoCodecInternalCng did not assign enough buffer.

P.S.
Generalizing problem 1, current NetEq decode function checks memory size by calling the duration estimate function. This is not ideal. A better way is to let codec's decode function to receive buffer size and return failure if it is not enough. This can be made in a separate CL.

BUG=webrtc:4985
R=henrik.lundin@webrtc.org

Review URL: https://codereview.webrtc.org/1334303005 .

Cr-Commit-Position: refs/heads/master@{#10031}
diff --git a/webrtc/modules/audio_coding/codecs/audio_decoder.h b/webrtc/modules/audio_coding/codecs/audio_decoder.h
index 556268c..b277d4b 100644
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.h
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h
@@ -61,7 +61,8 @@
   virtual bool HasDecodePlc() const;
 
   // Calls the packet-loss concealment of the decoder to update the state after
-  // one or several lost packets.
+  // one or several lost packets. The caller has to make sure that the
+  // memory allocated in |decoded| should accommodate |num_frames| frames.
   virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
 
   // Resets the decoder state (empty buffers etc.).
diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
index ded8b6f..9c09ae8 100644
--- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
+++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
@@ -294,6 +294,20 @@
                            const uint8_t* payload,
                            size_t payload_length_bytes);
 
+/****************************************************************************
+ * WebRtcOpus_PlcDuration(...)
+ *
+ * This function calculates the duration of a frame returned by packet loss
+ * concealment (PLC).
+ *
+ * Input:
+ *        - inst                 : Decoder context
+ *
+ * Return value                  : The duration of a frame returned by PLC, in
+ *                                 samples per channel.
+ */
+int WebRtcOpus_PlcDuration(OpusDecInst* inst);
+
 /* TODO(minyue): Check whether it is needed to add a decoder context to the
  * arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells
  * the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used.
diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
index 4f6e22f..2ac5373 100644
--- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
@@ -359,6 +359,12 @@
 int WebRtcOpus_DurationEst(OpusDecInst* inst,
                            const uint8_t* payload,
                            size_t payload_length_bytes) {
+  if (payload_length_bytes == 0) {
+    // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
+    // PLC duration correspondingly.
+    return WebRtcOpus_PlcDuration(inst);
+  }
+
   int frames, samples;
   frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
   if (frames < 0) {
@@ -373,6 +379,15 @@
   return samples;
 }
 
+int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
+  /* The number of samples we ask for is |number_of_lost_frames| times
+   * |prev_decoded_samples_|. Limit the number of samples to maximum
+   * |kWebRtcOpusMaxFrameSizePerChannel|. */
+  const int plc_samples = inst->prev_decoded_samples;
+  return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
+      plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
+}
+
 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
                               size_t payload_length_bytes) {
   int samples;
diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
index d2fd009..db622a7 100644
--- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
@@ -105,9 +105,12 @@
                                             kMaxBytes, bitstream_);
   EXPECT_GE(encoded_bytes_int, 0);
   encoded_bytes_ = static_cast<size_t>(encoded_bytes_int);
-  return WebRtcOpus_Decode(decoder, bitstream_,
-                           encoded_bytes_, output_audio,
-                           audio_type);
+  int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_);
+  int act_len = WebRtcOpus_Decode(decoder, bitstream_,
+                                  encoded_bytes_, output_audio,
+                                  audio_type);
+  EXPECT_EQ(est_len, act_len);
+  return act_len;
 }
 
 // Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index 02e9324..7c049b0 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -765,7 +765,7 @@
       // This handles the case when there is no transmission and the decoder
       // should produce internal comfort noise.
       // TODO(hlundin): Write test for codec-internal CNG.
-      DoCodecInternalCng();
+      DoCodecInternalCng(decoded_buffer_.get(), length);
       break;
     }
     case kDtmf: {
@@ -1163,7 +1163,11 @@
                       int* decoded_length,
                       AudioDecoder::SpeechType* speech_type) {
   *speech_type = AudioDecoder::kSpeech;
-  AudioDecoder* decoder = NULL;
+
+  // When packet_list is empty, we may be in kCodecInternalCng mode, and for
+  // that we use current active decoder.
+  AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
+
   if (!packet_list->empty()) {
     const Packet* packet = packet_list->front();
     uint8_t payload_type = packet->header.payloadType;
@@ -1231,8 +1235,14 @@
     decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]);
   }
 
-  int return_value = DecodeLoop(packet_list, operation, decoder,
-                                decoded_length, speech_type);
+  int return_value;
+  if (*operation == kCodecInternalCng) {
+    RTC_DCHECK(packet_list->empty());
+    return_value = DecodeCng(decoder, decoded_length, speech_type);
+  } else {
+    return_value = DecodeLoop(packet_list, *operation, decoder,
+                              decoded_length, speech_type);
+  }
 
   if (*decoded_length < 0) {
     // Error returned from the decoder.
@@ -1266,13 +1276,45 @@
   return return_value;
 }
 
-int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation,
+int NetEqImpl::DecodeCng(AudioDecoder* decoder, int* decoded_length,
+                         AudioDecoder::SpeechType* speech_type) {
+  if (!decoder) {
+    // This happens when active decoder is not defined.
+    *decoded_length = -1;
+    return 0;
+  }
+
+  while (*decoded_length < rtc::checked_cast<int>(output_size_samples_)) {
+    const int length = decoder->Decode(
+            nullptr, 0, fs_hz_,
+            (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
+            &decoded_buffer_[*decoded_length], speech_type);
+    if (length > 0) {
+      *decoded_length += length;
+      LOG(LS_VERBOSE) << "Decoded " << length << " CNG samples";
+    } else {
+      // Error.
+      LOG(LS_WARNING) << "Failed to decode CNG";
+      *decoded_length = -1;
+      break;
+    }
+    if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
+      // Guard against overflow.
+      LOG(LS_WARNING) << "Decoded too much CNG.";
+      return kDecodedTooMuch;
+    }
+  }
+  return 0;
+}
+
+int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation,
                           AudioDecoder* decoder, int* decoded_length,
                           AudioDecoder::SpeechType* speech_type) {
   Packet* packet = NULL;
   if (!packet_list->empty()) {
     packet = packet_list->front();
   }
+
   // Do decoding.
   while (packet &&
       !decoder_database_->IsComfortNoise(packet->header.payloadType)) {
@@ -1281,9 +1323,9 @@
     // number decoder channels.
     assert(sync_buffer_->Channels() == decoder->Channels());
     assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->Channels());
-    assert(*operation == kNormal || *operation == kAccelerate ||
-           *operation == kFastAccelerate || *operation == kMerge ||
-           *operation == kPreemptiveExpand);
+    assert(operation == kNormal || operation == kAccelerate ||
+           operation == kFastAccelerate || operation == kMerge ||
+           operation == kPreemptiveExpand);
     packet_list->pop_front();
     size_t payload_length = packet->payload_length;
     int decode_length;
@@ -1643,21 +1685,12 @@
   return 0;
 }
 
-void NetEqImpl::DoCodecInternalCng() {
-  int length = 0;
-  // TODO(hlundin): Will probably need a longer buffer for multi-channel.
-  int16_t decoded_buffer[kMaxFrameSize];
-  AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
-  if (decoder) {
-    const uint8_t* dummy_payload = NULL;
-    AudioDecoder::SpeechType speech_type;
-    length = decoder->Decode(
-        dummy_payload, 0, fs_hz_, kMaxFrameSize * sizeof(int16_t),
-        decoded_buffer, &speech_type);
-  }
-  assert(mute_factor_array_.get());
-  normal_->Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(),
-                   algorithm_buffer_.get());
+void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer,
+                                   size_t decoded_length) {
+  RTC_DCHECK(normal_.get());
+  RTC_DCHECK(mute_factor_array_.get());
+  normal_->Process(decoded_buffer, decoded_length, last_mode_,
+                   mute_factor_array_.get(), algorithm_buffer_.get());
   last_mode_ = kModeCodecInternalCng;
   expand_->Reset();
 }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index effecba..d7c9ac4 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -243,9 +243,14 @@
              AudioDecoder::SpeechType* speech_type)
       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
 
+  // Sub-method to Decode(). Performs codec internal CNG.
+  int DecodeCng(AudioDecoder* decoder, int* decoded_length,
+                AudioDecoder::SpeechType* speech_type)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
+
   // Sub-method to Decode(). Performs the actual decoding.
   int DecodeLoop(PacketList* packet_list,
-                 Operations* operation,
+                 const Operations& operation,
                  AudioDecoder* decoder,
                  int* decoded_length,
                  AudioDecoder::SpeechType* speech_type)
@@ -290,7 +295,8 @@
 
   // Calls the audio decoder to generate codec-internal comfort noise when
   // no packet was received.
-  void DoCodecInternalCng() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
+  void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
 
   // Calls the DtmfToneGenerator class to generate DTMF tones.
   int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 3a34692..5489fed 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -1005,4 +1005,245 @@
 
   EXPECT_CALL(mock_decoder, Die());
 }
-}  // namespace webrtc
+
+// This test checks the behavior of NetEq when audio decoder fails.
+TEST_F(NetEqImplTest, DecodingError) {
+  UseNoMocks();
+  CreateInstance();
+
+  const uint8_t kPayloadType = 17;   // Just an arbitrary number.
+  const uint32_t kReceiveTime = 17;  // Value doesn't matter for this test.
+  const int kSampleRateHz = 8000;
+  const int kDecoderErrorCode = -97;  // Any negative number.
+
+  // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms.
+  const size_t kFrameLengthSamples =
+      static_cast<size_t>(5 * kSampleRateHz / 1000);
+
+  const size_t kPayloadLengthBytes = 1;  // This can be arbitrary.
+
+  uint8_t payload[kPayloadLengthBytes] = {0};
+
+  WebRtcRTPHeader rtp_header;
+  rtp_header.header.payloadType = kPayloadType;
+  rtp_header.header.sequenceNumber = 0x1234;
+  rtp_header.header.timestamp = 0x12345678;
+  rtp_header.header.ssrc = 0x87654321;
+
+  // Create a mock decoder object.
+  MockAudioDecoder mock_decoder;
+  EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return());
+  EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1));
+  EXPECT_CALL(mock_decoder, IncomingPacket(_, kPayloadLengthBytes, _, _, _))
+      .WillRepeatedly(Return(0));
+  EXPECT_CALL(mock_decoder, PacketDuration(_, _))
+      .WillRepeatedly(Return(kFrameLengthSamples));
+  EXPECT_CALL(mock_decoder, ErrorCode())
+      .WillOnce(Return(kDecoderErrorCode));
+  EXPECT_CALL(mock_decoder, HasDecodePlc())
+      .WillOnce(Return(false));
+  int16_t dummy_output[kFrameLengthSamples] = {0};
+
+  {
+    InSequence sequence;  // Dummy variable.
+    // Mock decoder works normally the first time.
+    EXPECT_CALL(mock_decoder,
+                Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _))
+        .Times(3)
+        .WillRepeatedly(
+            DoAll(SetArrayArgument<4>(dummy_output,
+                                      dummy_output + kFrameLengthSamples),
+                  SetArgPointee<5>(AudioDecoder::kSpeech),
+                  Return(kFrameLengthSamples)))
+        .RetiresOnSaturation();
+
+    // Then mock decoder fails. A common reason for failure can be buffer being
+    // too short
+    EXPECT_CALL(mock_decoder,
+                Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _))
+        .WillOnce(Return(-1))
+        .RetiresOnSaturation();
+
+    // Mock decoder finally returns to normal.
+    EXPECT_CALL(mock_decoder,
+                Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _))
+        .Times(2)
+        .WillRepeatedly(
+            DoAll(SetArrayArgument<4>(dummy_output,
+                                  dummy_output + kFrameLengthSamples),
+                  SetArgPointee<5>(AudioDecoder::kSpeech),
+                  Return(kFrameLengthSamples)));
+  }
+
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->RegisterExternalDecoder(&mock_decoder, kDecoderPCM16B,
+                                            kPayloadType, kSampleRateHz));
+
+  // Insert packets.
+  for (int i = 0; i < 6; ++i) {
+    rtp_header.header.sequenceNumber += 1;
+    rtp_header.header.timestamp += kFrameLengthSamples;
+    EXPECT_EQ(NetEq::kOK,
+              neteq_->InsertPacket(rtp_header, payload, kPayloadLengthBytes,
+                                   kReceiveTime));
+  }
+
+  // Pull audio.
+  const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
+  int16_t output[kMaxOutputSize];
+  size_t samples_per_channel;
+  int num_channels;
+  NetEqOutputType type;
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  EXPECT_EQ(kOutputNormal, type);
+
+  // Pull audio again. Decoder fails.
+  EXPECT_EQ(NetEq::kFail,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError());
+  EXPECT_EQ(kDecoderErrorCode, neteq_->LastDecoderError());
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  // TODO(minyue): should NetEq better give kOutputPLC, since it is actually an
+  // expansion.
+  EXPECT_EQ(kOutputNormal, type);
+
+  // Pull audio again, should continue an expansion.
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  EXPECT_EQ(kOutputPLC, type);
+
+  // Pull audio again, should behave normal.
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  EXPECT_EQ(kOutputNormal, type);
+
+  EXPECT_CALL(mock_decoder, Die());
+}
+
+// This test checks the behavior of NetEq when audio decoder fails during CNG.
+TEST_F(NetEqImplTest, DecodingErrorDuringInternalCng) {
+  UseNoMocks();
+  CreateInstance();
+
+  const uint8_t kPayloadType = 17;   // Just an arbitrary number.
+  const uint32_t kReceiveTime = 17;  // Value doesn't matter for this test.
+  const int kSampleRateHz = 8000;
+  const int kDecoderErrorCode = -97;  // Any negative number.
+
+  // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms.
+  const size_t kFrameLengthSamples =
+      static_cast<size_t>(5 * kSampleRateHz / 1000);
+
+  const size_t kPayloadLengthBytes = 1;  // This can be arbitrary.
+
+  uint8_t payload[kPayloadLengthBytes] = {0};
+
+  WebRtcRTPHeader rtp_header;
+  rtp_header.header.payloadType = kPayloadType;
+  rtp_header.header.sequenceNumber = 0x1234;
+  rtp_header.header.timestamp = 0x12345678;
+  rtp_header.header.ssrc = 0x87654321;
+
+  // Create a mock decoder object.
+  MockAudioDecoder mock_decoder;
+  EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return());
+  EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1));
+  EXPECT_CALL(mock_decoder, IncomingPacket(_, kPayloadLengthBytes, _, _, _))
+      .WillRepeatedly(Return(0));
+  EXPECT_CALL(mock_decoder, PacketDuration(_, _))
+      .WillRepeatedly(Return(kFrameLengthSamples));
+  EXPECT_CALL(mock_decoder, ErrorCode())
+      .WillOnce(Return(kDecoderErrorCode));
+  int16_t dummy_output[kFrameLengthSamples] = {0};
+
+  {
+    InSequence sequence;  // Dummy variable.
+    // Mock decoder works normally the first 2 times.
+    EXPECT_CALL(mock_decoder,
+                Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _))
+        .Times(2)
+        .WillRepeatedly(
+            DoAll(SetArrayArgument<4>(dummy_output,
+                                      dummy_output + kFrameLengthSamples),
+                  SetArgPointee<5>(AudioDecoder::kComfortNoise),
+                  Return(kFrameLengthSamples)))
+        .RetiresOnSaturation();
+
+    // Then mock decoder fails. A common reason for failure can be buffer being
+    // too short
+    EXPECT_CALL(mock_decoder, Decode(nullptr, 0, kSampleRateHz, _, _, _))
+        .WillOnce(Return(-1))
+        .RetiresOnSaturation();
+
+    // Mock decoder finally returns to normal.
+    EXPECT_CALL(mock_decoder, Decode(nullptr, 0, kSampleRateHz, _, _, _))
+        .Times(2)
+        .WillRepeatedly(
+            DoAll(SetArrayArgument<4>(dummy_output,
+                                  dummy_output + kFrameLengthSamples),
+                  SetArgPointee<5>(AudioDecoder::kComfortNoise),
+                  Return(kFrameLengthSamples)));
+  }
+
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->RegisterExternalDecoder(&mock_decoder, kDecoderPCM16B,
+                                            kPayloadType, kSampleRateHz));
+
+  // Insert 2 packets. This will make netEq into codec internal CNG mode.
+  for (int i = 0; i < 2; ++i) {
+    rtp_header.header.sequenceNumber += 1;
+    rtp_header.header.timestamp += kFrameLengthSamples;
+    EXPECT_EQ(NetEq::kOK,
+              neteq_->InsertPacket(rtp_header, payload, kPayloadLengthBytes,
+                                   kReceiveTime));
+  }
+
+  // Pull audio.
+  const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
+  int16_t output[kMaxOutputSize];
+  size_t samples_per_channel;
+  int num_channels;
+  NetEqOutputType type;
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  EXPECT_EQ(kOutputCNG, type);
+
+  // Pull audio again. Decoder fails.
+  EXPECT_EQ(NetEq::kFail,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError());
+  EXPECT_EQ(kDecoderErrorCode, neteq_->LastDecoderError());
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  // TODO(minyue): should NetEq better give kOutputPLC, since it is actually an
+  // expansion.
+  EXPECT_EQ(kOutputCNG, type);
+
+  // Pull audio again, should resume codec CNG.
+  EXPECT_EQ(NetEq::kOK,
+            neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
+                             &num_channels, &type));
+  EXPECT_EQ(kMaxOutputSize, samples_per_channel);
+  EXPECT_EQ(1, num_channels);
+  EXPECT_EQ(kOutputCNG, type);
+
+  EXPECT_CALL(mock_decoder, Die());
+}
+
+}// namespace webrtc