Fix a number of things in AudioEncoderDecoderIsac* - Add max_bit_rate and max_payload_size_bytes to config structs. - Fix support for 48 kHz sample rate. - Fix iSAC-RED. - Add method UpdateDecoderSampleRate(). - Update locking structure with a separate lock for local member variables used by the encoder methods. BUG=3926 COAUTHOR:kwiberg@webrtc.org R=minyue@webrtc.org Review URL: https://webrtc-codereview.appspot.com/41659004 Cr-Commit-Position: refs/heads/master@{#8204} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8204 4adac7df-926f-26a2-2b94-8c16560cd09d

commit: bdebccf384db689931ba3df9afbcd59c85ddb8e2 [log] [tgz]
author: henrik.lundin@webrtc.org <henrik.lundin@webrtc.org> Thu Jan 29 14:10:32 2015 +0000
committer: henrik.lundin@webrtc.org <henrik.lundin@webrtc.org> Thu Jan 29 14:11:09 2015 +0000
tree: 48e9edd1cd287983494e915d59d617e93abf0aed
parent: 18e758526d7cb3c50975b11ed9e04f8f70306a9d [diff]
diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h
index 712d3e6..65a1204 100644
--- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h
+++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h

@@ -29,34 +29,45 @@
   // are
   //  - 16000 Hz, 30 ms, 10000-32000 bps
   //  - 16000 Hz, 60 ms, 10000-32000 bps
-  //  - 32000 Hz, 30 ms, 10000-56000 bps (if T has 32 kHz support)
+  //  - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
+  //  - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
   struct Config {
     Config();
     bool IsOk() const;
     int payload_type;
+    int red_payload_type;
     int sample_rate_hz;
     int frame_size_ms;
     int bit_rate;  // Limit on the short-term average bit rate, in bits/second.
+    int max_bit_rate;
+    int max_payload_size_bytes;
   };
 
-  // For constructing an encoder in channel-adaptive mode. The sample rate must
-  // be 16000 Hz; the initial frame size can be 30 or 60 ms; and the initial
-  // bit rate can be 10000-56000 bps if T has 32 kHz support, 10000-32000 bps
-  // otherwise.
+  // For constructing an encoder in channel-adaptive mode. Allowed combinations
+  // are
+  //  - 16000 Hz, 30 ms, 10000-32000 bps
+  //  - 16000 Hz, 60 ms, 10000-32000 bps
+  //  - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
+  //  - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
   struct ConfigAdaptive {
     ConfigAdaptive();
     bool IsOk() const;
     int payload_type;
+    int red_payload_type;
     int sample_rate_hz;
     int initial_frame_size_ms;
     int initial_bit_rate;
+    int max_bit_rate;
     bool enforce_frame_size;  // Prevent adaptive changes to the frame size?
+    int max_payload_size_bytes;
   };
 
   explicit AudioEncoderDecoderIsacT(const Config& config);
   explicit AudioEncoderDecoderIsacT(const ConfigAdaptive& config);
   virtual ~AudioEncoderDecoderIsacT() OVERRIDE;
 
+  void UpdateDecoderSampleRate(int sample_rate_hz);
+
   // AudioEncoder public methods.
   virtual int sample_rate_hz() const OVERRIDE;
   virtual int num_channels() const OVERRIDE;
@@ -91,24 +102,39 @@
                               EncodedInfo* info) OVERRIDE;
 
  private:
+  // This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and
+  // STREAM_MAXW16_60MS for iSAC fix (60 ms).
+  static const size_t kSufficientEncodeBufferSizeBytes = 400;
+
   const int payload_type_;
+  const int red_payload_type_;
 
   // iSAC encoder/decoder state, guarded by a mutex to ensure that encode calls
   // from one thread won't clash with decode calls from another thread.
+  // Note: PT_GUARDED_BY is disabled since it is not yet supported by clang.
+  const scoped_ptr<CriticalSectionWrapper> state_lock_;
+  typename T::instance_type* isac_state_
+      GUARDED_BY(state_lock_) /* PT_GUARDED_BY(lock_)*/;
+
+  // Must be acquired before state_lock_.
   const scoped_ptr<CriticalSectionWrapper> lock_;
-  typename T::instance_type* isac_state_ GUARDED_BY(lock_);
 
   // Have we accepted input but not yet emitted it in a packet?
-  bool packet_in_progress_;
-
-  // Working on the very first output frame.
-  bool first_output_frame_;
+  bool packet_in_progress_ GUARDED_BY(lock_);
 
   // Timestamp of the first input of the currently in-progress packet.
-  uint32_t packet_timestamp_;
+  uint32_t packet_timestamp_ GUARDED_BY(lock_);
 
   // Timestamp of the previously encoded packet.
-  uint32_t last_encoded_timestamp_;
+  uint32_t last_encoded_timestamp_ GUARDED_BY(lock_);
+
+  // Redundant encoding from last time.
+  // Note: If has_redundant_encoder is false, we set the array length to 1,
+  // since zero-length arrays are not supported by all compilers.
+  uint8_t redundant_payload_[T::has_redundant_encoder
+                                 ? kSufficientEncodeBufferSizeBytes
+                                 : 1] GUARDED_BY(lock_);
+  size_t redundant_length_bytes_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(AudioEncoderDecoderIsacT);
 };

diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h
index 5b337c1..b45fb26 100644
--- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h
+++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h

@@ -13,29 +13,49 @@
 
 #include "webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h"
 
+#include <algorithm>
+
+#include "webrtc/base/checks.h"
 #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
 
 namespace webrtc {
 
 const int kIsacPayloadType = 103;
+const int kInvalidPayloadType = -1;
 
 template <typename T>
 AudioEncoderDecoderIsacT<T>::Config::Config()
     : payload_type(kIsacPayloadType),
+      red_payload_type(kInvalidPayloadType),
       sample_rate_hz(16000),
       frame_size_ms(30),
-      bit_rate(32000) {
+      bit_rate(32000),
+      max_bit_rate(-1),
+      max_payload_size_bytes(-1) {
 }
 
 template <typename T>
 bool AudioEncoderDecoderIsacT<T>::Config::IsOk() const {
+  if (max_bit_rate < 32000 && max_bit_rate != -1)
+    return false;
+  if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1)
+    return false;
   switch (sample_rate_hz) {
     case 16000:
+      if (max_bit_rate > 53400)
+        return false;
+      if (max_payload_size_bytes > 400)
+        return false;
       return (frame_size_ms == 30 || frame_size_ms == 60) &&
              bit_rate >= 10000 && bit_rate <= 32000;
     case 32000:
-      return T::has_32kHz &&
+    case 48000:
+      if (max_bit_rate > 160000)
+        return false;
+      if (max_payload_size_bytes > 600)
+        return false;
+      return T::has_swb &&
              (frame_size_ms == 30 && bit_rate >= 10000 && bit_rate <= 56000);
     default:
       return false;
@@ -45,41 +65,76 @@
 template <typename T>
 AudioEncoderDecoderIsacT<T>::ConfigAdaptive::ConfigAdaptive()
     : payload_type(kIsacPayloadType),
+      red_payload_type(kInvalidPayloadType),
       sample_rate_hz(16000),
       initial_frame_size_ms(30),
       initial_bit_rate(32000),
-      enforce_frame_size(false) {
+      max_bit_rate(-1),
+      enforce_frame_size(false),
+      max_payload_size_bytes(-1) {
 }
 
 template <typename T>
 bool AudioEncoderDecoderIsacT<T>::ConfigAdaptive::IsOk() const {
-  static const int max_rate = T::has_32kHz ? 56000 : 32000;
-  return sample_rate_hz == 16000 &&
-         (initial_frame_size_ms == 30 || initial_frame_size_ms == 60) &&
-         initial_bit_rate >= 10000 && initial_bit_rate <= max_rate;
+  if (max_bit_rate < 32000 && max_bit_rate != -1)
+    return false;
+  if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1)
+    return false;
+  switch (sample_rate_hz) {
+    case 16000:
+      if (max_bit_rate > 53400)
+        return false;
+      if (max_payload_size_bytes > 400)
+        return false;
+      return (initial_frame_size_ms == 30 || initial_frame_size_ms == 60) &&
+             initial_bit_rate >= 10000 && initial_bit_rate <= 32000;
+    case 32000:
+    case 48000:
+      if (max_bit_rate > 160000)
+        return false;
+      if (max_payload_size_bytes > 600)
+        return false;
+      return T::has_swb &&
+             (initial_frame_size_ms == 30 && initial_bit_rate >= 10000 &&
+              initial_bit_rate <= 56000);
+    default:
+      return false;
+  }
 }
 
 template <typename T>
 AudioEncoderDecoderIsacT<T>::AudioEncoderDecoderIsacT(const Config& config)
     : payload_type_(config.payload_type),
+      red_payload_type_(config.red_payload_type),
+      state_lock_(CriticalSectionWrapper::CreateCriticalSection()),
       lock_(CriticalSectionWrapper::CreateCriticalSection()),
       packet_in_progress_(false),
-      first_output_frame_(true) {
+      redundant_length_bytes_(0) {
   CHECK(config.IsOk());
   CHECK_EQ(0, T::Create(&isac_state_));
   CHECK_EQ(0, T::EncoderInit(isac_state_, 1));
   CHECK_EQ(0, T::SetEncSampRate(isac_state_, config.sample_rate_hz));
   CHECK_EQ(0, T::Control(isac_state_, config.bit_rate, config.frame_size_ms));
-  CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz));
+  // When config.sample_rate_hz is set to 48000 Hz (iSAC-fb), the decoder is
+  // still set to 32000 Hz, since there is no full-band mode in the decoder.
+  CHECK_EQ(0, T::SetDecSampRate(isac_state_,
+                                std::min(config.sample_rate_hz, 32000)));
+  if (config.max_payload_size_bytes != -1)
+    CHECK_EQ(0,
+             T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes));
+  if (config.max_bit_rate != -1)
+    CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate));
 }
 
 template <typename T>
 AudioEncoderDecoderIsacT<T>::AudioEncoderDecoderIsacT(
     const ConfigAdaptive& config)
     : payload_type_(config.payload_type),
+      red_payload_type_(config.red_payload_type),
+      state_lock_(CriticalSectionWrapper::CreateCriticalSection()),
       lock_(CriticalSectionWrapper::CreateCriticalSection()),
       packet_in_progress_(false),
-      first_output_frame_(true) {
+      redundant_length_bytes_(0) {
   CHECK(config.IsOk());
   CHECK_EQ(0, T::Create(&isac_state_));
   CHECK_EQ(0, T::EncoderInit(isac_state_, 0));
@@ -88,6 +143,11 @@
                             config.initial_frame_size_ms,
                             config.enforce_frame_size));
   CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz));
+  if (config.max_payload_size_bytes != -1)
+    CHECK_EQ(0,
+             T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes));
+  if (config.max_bit_rate != -1)
+    CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate));
 }
 
 template <typename T>
@@ -96,8 +156,14 @@
 }
 
 template <typename T>
+void AudioEncoderDecoderIsacT<T>::UpdateDecoderSampleRate(int sample_rate_hz) {
+  CriticalSectionScoped cs(state_lock_.get());
+  CHECK_EQ(0, T::SetDecSampRate(isac_state_, sample_rate_hz));
+}
+
+template <typename T>
 int AudioEncoderDecoderIsacT<T>::sample_rate_hz() const {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   return T::EncSampRate(isac_state_);
 }
 
@@ -108,7 +174,7 @@
 
 template <typename T>
 int AudioEncoderDecoderIsacT<T>::Num10MsFramesInNextPacket() const {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   const int samples_in_next_packet = T::GetNewFrameLen(isac_state_);
   return rtc::CheckedDivExact(samples_in_next_packet,
                               rtc::CheckedDivExact(sample_rate_hz(), 100));
@@ -125,6 +191,7 @@
                                                  size_t max_encoded_bytes,
                                                  uint8_t* encoded,
                                                  EncodedInfo* info) {
+  CriticalSectionScoped cs(lock_.get());
   if (!packet_in_progress_) {
     // Starting a new packet; remember the timestamp for later.
     packet_in_progress_ = true;
@@ -132,7 +199,7 @@
   }
   int r;
   {
-    CriticalSectionScoped cs(lock_.get());
+    CriticalSectionScoped cs(state_lock_.get());
     r = T::Encode(isac_state_, audio, encoded);
   }
   if (r < 0) {
@@ -158,30 +225,40 @@
   if (!T::has_redundant_encoder)
     return true;
 
-  if (first_output_frame_) {
+  if (redundant_length_bytes_ == 0) {
     // Do not emit the first output frame when using redundant encoding.
     info->encoded_bytes = 0;
-    first_output_frame_ = false;
   } else {
-    // Call the encoder's method to get redundant encoding.
+    // When a redundant payload from the last Encode call is available, the
+    // resulting payload consists of the primary encoding followed by the
+    // redundant encoding from last time.
     const size_t primary_length = info->encoded_bytes;
-    int16_t secondary_len;
-    {
-      CriticalSectionScoped cs(lock_.get());
-      secondary_len = T::GetRedPayload(isac_state_, &encoded[primary_length]);
-    }
-    DCHECK_GE(secondary_len, 0);
+    memcpy(&encoded[primary_length], redundant_payload_,
+           redundant_length_bytes_);
+    // The EncodedInfo struct |info| will have one root node and two leaves.
     // |info| will be implicitly cast to an EncodedInfoLeaf struct, effectively
     // discarding the (empty) vector of redundant information. This is
     // intentional.
     info->redundant.push_back(*info);
     EncodedInfoLeaf secondary_info;
     secondary_info.payload_type = info->payload_type;
-    secondary_info.encoded_bytes = secondary_len;
+    secondary_info.encoded_bytes = redundant_length_bytes_;
     secondary_info.encoded_timestamp = last_encoded_timestamp_;
     info->redundant.push_back(secondary_info);
-    info->encoded_bytes += secondary_len;  // Sum of primary and secondary.
+    info->encoded_bytes +=
+        redundant_length_bytes_;  // Sum of primary and secondary.
+    DCHECK_NE(red_payload_type_, kInvalidPayloadType)
+        << "Config.red_payload_type must be set for "
+           "AudioEncoderDecoderIsacRed.";
+    info->payload_type = red_payload_type_;
   }
+  {
+    CriticalSectionScoped cs(state_lock_.get());
+    // Call the encoder's method to get redundant encoding.
+    redundant_length_bytes_ = T::GetRedPayload(isac_state_, redundant_payload_);
+  }
+  DCHECK_LE(redundant_length_bytes_, sizeof(redundant_payload_));
+  DCHECK_GE(redundant_length_bytes_, 0u);
   last_encoded_timestamp_ = packet_timestamp_;
   return true;
 }
@@ -191,7 +268,7 @@
                                         size_t encoded_len,
                                         int16_t* decoded,
                                         SpeechType* speech_type) {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   int16_t temp_type = 1;  // Default is speech.
   int16_t ret =
       T::Decode(isac_state_, encoded, static_cast<int16_t>(encoded_len),
@@ -205,7 +282,7 @@
                                                  size_t encoded_len,
                                                  int16_t* decoded,
                                                  SpeechType* speech_type) {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   int16_t temp_type = 1;  // Default is speech.
   int16_t ret =
       T::DecodeRcu(isac_state_, encoded, static_cast<int16_t>(encoded_len),
@@ -221,13 +298,13 @@
 
 template <typename T>
 int AudioEncoderDecoderIsacT<T>::DecodePlc(int num_frames, int16_t* decoded) {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   return T::DecodePlc(isac_state_, decoded, num_frames);
 }
 
 template <typename T>
 int AudioEncoderDecoderIsacT<T>::Init() {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   return T::DecoderInit(isac_state_);
 }
 
@@ -237,7 +314,7 @@
                                                 uint16_t rtp_sequence_number,
                                                 uint32_t rtp_timestamp,
                                                 uint32_t arrival_timestamp) {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   return T::UpdateBwEstimate(
       isac_state_, payload, static_cast<int32_t>(payload_len),
       rtp_sequence_number, rtp_timestamp, arrival_timestamp);
@@ -245,7 +322,7 @@
 
 template <typename T>
 int AudioEncoderDecoderIsacT<T>::ErrorCode() {
-  CriticalSectionScoped cs(lock_.get());
+  CriticalSectionScoped cs(state_lock_.get());
   return T::GetErrorCode(isac_state_);
 }
 

diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h
index 944152b..4982e5a 100644
--- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h

@@ -18,7 +18,7 @@
 
 struct IsacFix {
   typedef ISACFIX_MainStruct instance_type;
-  static const bool has_32kHz = false;
+  static const bool has_swb = false;
   static const bool has_redundant_encoder = false;
   static const uint16_t kFixSampleRate = 16000;
   static inline int16_t Control(instance_type* inst,
@@ -105,6 +105,13 @@
     FATAL() << "Should never be called.";
     return -1;
   }
+  static inline int16_t SetMaxPayloadSize(instance_type* inst,
+                                          int16_t max_payload_size_bytes) {
+    return WebRtcIsacfix_SetMaxPayloadSize(inst, max_payload_size_bytes);
+  }
+  static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) {
+    return WebRtcIsacfix_SetMaxRate(inst, max_bit_rate);
+  }
 };
 
 typedef AudioEncoderDecoderIsacT<IsacFix> AudioEncoderDecoderIsacFix;

diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h
index bcfe222..41ed490 100644
--- a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h
+++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h

@@ -18,7 +18,7 @@
 
 struct IsacFloat {
   typedef ISACStruct instance_type;
-  static const bool has_32kHz = true;
+  static const bool has_swb = true;
   static const bool has_redundant_encoder = false;
   static inline int16_t Control(instance_type* inst,
                                 int32_t rate,
@@ -102,6 +102,13 @@
     FATAL() << "Should never be called.";
     return -1;
   }
+  static inline int16_t SetMaxPayloadSize(instance_type* inst,
+                                          int16_t max_payload_size_bytes) {
+    return WebRtcIsac_SetMaxPayloadSize(inst, max_payload_size_bytes);
+  }
+  static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) {
+    return WebRtcIsac_SetMaxRate(inst, max_bit_rate);
+  }
 };
 
 typedef AudioEncoderDecoderIsacT<IsacFloat> AudioEncoderDecoderIsac;

diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc b/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc
index fb6cadf..7ff1b46 100644
--- a/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc

@@ -22,6 +22,7 @@
 TEST(AudioEncoderIsacRedTest, CompareRedAndNoRed) {
   static const int kSampleRateHz = 16000;
   static const int k10MsSamples = kSampleRateHz / 100;
+  static const int kRedPayloadType = 100;
   // Fill the input array with pseudo-random noise in the range [-1000, 1000].
   int16_t input[k10MsSamples];
   srand(1418811752);
@@ -37,6 +38,9 @@
   AudioEncoderDecoderIsac isac_encoder(config);
   AudioEncoderDecoderIsacRed::Config red_config;
   red_config.sample_rate_hz = kSampleRateHz;
+  red_config.red_payload_type = kRedPayloadType;
+  ASSERT_NE(red_config.red_payload_type, red_config.payload_type)
+      << "iSAC and RED payload types must be different.";
   AudioEncoderDecoderIsacRed isac_red_encoder(red_config);
   AudioEncoder::EncodedInfo info, red_info;
commit	bdebccf384db689931ba3df9afbcd59c85ddb8e2	[log] [tgz]
author	henrik.lundin@webrtc.org <henrik.lundin@webrtc.org>	Thu Jan 29 14:10:32 2015 +0000
committer	henrik.lundin@webrtc.org <henrik.lundin@webrtc.org>	Thu Jan 29 14:11:09 2015 +0000
tree	48e9edd1cd287983494e915d59d617e93abf0aed
parent	18e758526d7cb3c50975b11ed9e04f8f70306a9d [diff]