| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/cast/audio_sender/audio_encoder.h" |
| |
| #include <algorithm> |
| |
| #include "base/bind.h" |
| #include "base/bind_helpers.h" |
| #include "base/location.h" |
| #include "base/stl_util.h" |
| #include "base/sys_byteorder.h" |
| #include "base/time/time.h" |
| #include "media/base/audio_bus.h" |
| #include "media/cast/cast_defines.h" |
| #include "media/cast/cast_environment.h" |
| #include "third_party/opus/src/include/opus.h" |
| |
| namespace media { |
| namespace cast { |
| |
| namespace { |
| |
| // The fixed number of audio frames per second and, inversely, the duration of |
| // one frame's worth of samples. |
| const int kFramesPerSecond = 100; |
| const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder! |
| |
| // Threshold used to decide whether audio being delivered to the encoder is |
| // coming in too slow with respect to the capture timestamps. |
| const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis; |
| |
| } // namespace |
| |
| |
| // Base class that handles the common problem of feeding one or more AudioBus' |
| // data into a buffer and then, once the buffer is full, encoding the signal and |
| // emitting an EncodedFrame via the FrameEncodedCallback. |
| // |
| // Subclasses complete the implementation by handling the actual encoding |
| // details. |
| class AudioEncoder::ImplBase |
| : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> { |
| public: |
| ImplBase(const scoped_refptr<CastEnvironment>& cast_environment, |
| transport::AudioCodec codec, |
| int num_channels, |
| int sampling_rate, |
| const FrameEncodedCallback& callback) |
| : cast_environment_(cast_environment), |
| codec_(codec), |
| num_channels_(num_channels), |
| samples_per_frame_(sampling_rate / kFramesPerSecond), |
| callback_(callback), |
| cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), |
| buffer_fill_end_(0), |
| frame_id_(0), |
| frame_rtp_timestamp_(0) { |
| // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. |
| const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; |
| if (num_channels_ <= 0 || samples_per_frame_ <= 0 || |
| sampling_rate % kFramesPerSecond != 0 || |
| samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { |
| cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; |
| } |
| } |
| |
| CastInitializationStatus InitializationResult() const { |
| return cast_initialization_status_; |
| } |
| |
| void EncodeAudio(scoped_ptr<AudioBus> audio_bus, |
| const base::TimeTicks& recorded_time) { |
| DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); |
| DCHECK(!recorded_time.is_null()); |
| |
| // Determine whether |recorded_time| is consistent with the amount of audio |
| // data having been processed in the past. Resolve the underrun problem by |
| // dropping data from the internal buffer and skipping ahead the next |
| // frame's RTP timestamp by the estimated number of frames missed. On the |
| // other hand, don't attempt to resolve overruns: A receiver should |
| // gracefully deal with an excess of audio data. |
| const base::TimeDelta frame_duration = |
| base::TimeDelta::FromMilliseconds(kFrameDurationMillis); |
| base::TimeDelta buffer_fill_duration = |
| buffer_fill_end_ * frame_duration / samples_per_frame_; |
| if (!frame_capture_time_.is_null()) { |
| const base::TimeDelta amount_ahead_by = |
| recorded_time - (frame_capture_time_ + buffer_fill_duration); |
| if (amount_ahead_by > |
| base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) { |
| buffer_fill_end_ = 0; |
| buffer_fill_duration = base::TimeDelta(); |
| const int64 num_frames_missed = amount_ahead_by / |
| base::TimeDelta::FromMilliseconds(kFrameDurationMillis); |
| frame_rtp_timestamp_ += |
| static_cast<uint32>(num_frames_missed * samples_per_frame_); |
| DVLOG(1) << "Skipping RTP timestamp ahead to account for " |
| << num_frames_missed * samples_per_frame_ |
| << " samples' worth of underrun."; |
| } |
| } |
| frame_capture_time_ = recorded_time - buffer_fill_duration; |
| |
| // Encode all audio in |audio_bus| into zero or more frames. |
| int src_pos = 0; |
| while (src_pos < audio_bus->frames()) { |
| const int num_samples_to_xfer = std::min( |
| samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos); |
| DCHECK_EQ(audio_bus->channels(), num_channels_); |
| TransferSamplesIntoBuffer( |
| audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer); |
| src_pos += num_samples_to_xfer; |
| buffer_fill_end_ += num_samples_to_xfer; |
| |
| if (buffer_fill_end_ < samples_per_frame_) |
| break; |
| |
| scoped_ptr<transport::EncodedFrame> audio_frame( |
| new transport::EncodedFrame()); |
| audio_frame->dependency = transport::EncodedFrame::KEY; |
| audio_frame->frame_id = frame_id_; |
| audio_frame->referenced_frame_id = frame_id_; |
| audio_frame->rtp_timestamp = frame_rtp_timestamp_; |
| audio_frame->reference_time = frame_capture_time_; |
| |
| if (EncodeFromFilledBuffer(&audio_frame->data)) { |
| cast_environment_->PostTask( |
| CastEnvironment::MAIN, |
| FROM_HERE, |
| base::Bind(callback_, base::Passed(&audio_frame))); |
| } |
| |
| // Reset the internal buffer, frame ID, and timestamps for the next frame. |
| buffer_fill_end_ = 0; |
| ++frame_id_; |
| frame_rtp_timestamp_ += samples_per_frame_; |
| frame_capture_time_ += frame_duration; |
| } |
| } |
| |
| protected: |
| friend class base::RefCountedThreadSafe<ImplBase>; |
| virtual ~ImplBase() {} |
| |
| virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, |
| int source_offset, |
| int buffer_fill_offset, |
| int num_samples) = 0; |
| virtual bool EncodeFromFilledBuffer(std::string* out) = 0; |
| |
| const scoped_refptr<CastEnvironment> cast_environment_; |
| const transport::AudioCodec codec_; |
| const int num_channels_; |
| const int samples_per_frame_; |
| const FrameEncodedCallback callback_; |
| |
| // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. |
| CastInitializationStatus cast_initialization_status_; |
| |
| private: |
| // In the case where a call to EncodeAudio() cannot completely fill the |
| // buffer, this points to the position at which to populate data in a later |
| // call. |
| int buffer_fill_end_; |
| |
| // A counter used to label EncodedFrames. |
| uint32 frame_id_; |
| |
| // The RTP timestamp for the next frame of encoded audio. This is defined as |
| // the number of audio samples encoded so far, plus the estimated number of |
| // samples that were missed due to data underruns. A receiver uses this value |
| // to detect gaps in the audio signal data being provided. Per the spec, RTP |
| // timestamp values are allowed to overflow and roll around past zero. |
| uint32 frame_rtp_timestamp_; |
| |
| // The local system time associated with the start of the next frame of |
| // encoded audio. This value is passed on to a receiver as a reference clock |
| // timestamp for the purposes of synchronizing audio and video. Its |
| // progression is expected to drift relative to the elapsed time implied by |
| // the RTP timestamps. |
| base::TimeTicks frame_capture_time_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ImplBase); |
| }; |
| |
| class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { |
| public: |
| OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment, |
| int num_channels, |
| int sampling_rate, |
| int bitrate, |
| const FrameEncodedCallback& callback) |
| : ImplBase(cast_environment, |
| transport::kOpus, |
| num_channels, |
| sampling_rate, |
| callback), |
| encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), |
| opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), |
| buffer_(new float[num_channels * samples_per_frame_]) { |
| if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) |
| return; |
| if (opus_encoder_init(opus_encoder_, |
| sampling_rate, |
| num_channels, |
| OPUS_APPLICATION_AUDIO) != OPUS_OK) { |
| ImplBase::cast_initialization_status_ = |
| STATUS_INVALID_AUDIO_CONFIGURATION; |
| return; |
| } |
| ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; |
| |
| if (bitrate <= 0) { |
| // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a |
| // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms |
| // frame size. The opus library authors may, of course, adjust this in |
| // later versions. |
| bitrate = OPUS_AUTO; |
| } |
| CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)), |
| OPUS_OK); |
| } |
| |
| private: |
| virtual ~OpusImpl() {} |
| |
| virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, |
| int source_offset, |
| int buffer_fill_offset, |
| int num_samples) OVERRIDE { |
| // Opus requires channel-interleaved samples in a single array. |
| for (int ch = 0; ch < audio_bus->channels(); ++ch) { |
| const float* src = audio_bus->channel(ch) + source_offset; |
| const float* const src_end = src + num_samples; |
| float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch; |
| for (; src < src_end; ++src, dest += num_channels_) |
| *dest = *src; |
| } |
| } |
| |
| virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { |
| out->resize(kOpusMaxPayloadSize); |
| const opus_int32 result = |
| opus_encode_float(opus_encoder_, |
| buffer_.get(), |
| samples_per_frame_, |
| reinterpret_cast<uint8*>(string_as_array(out)), |
| kOpusMaxPayloadSize); |
| if (result > 1) { |
| out->resize(result); |
| return true; |
| } else if (result < 0) { |
| LOG(ERROR) << "Error code from opus_encode_float(): " << result; |
| return false; |
| } else { |
| // Do nothing: The documentation says that a return value of zero or |
| // one byte means the packet does not need to be transmitted. |
| return false; |
| } |
| } |
| |
| const scoped_ptr<uint8[]> encoder_memory_; |
| OpusEncoder* const opus_encoder_; |
| const scoped_ptr<float[]> buffer_; |
| |
| // This is the recommended value, according to documentation in |
| // third_party/opus/src/include/opus.h, so that the Opus encoder does not |
| // degrade the audio due to memory constraints. |
| // |
| // Note: Whereas other RTP implementations do not, the cast library is |
| // perfectly capable of transporting larger than MTU-sized audio frames. |
| static const int kOpusMaxPayloadSize = 4000; |
| |
| DISALLOW_COPY_AND_ASSIGN(OpusImpl); |
| }; |
| |
| class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { |
| public: |
| Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment, |
| int num_channels, |
| int sampling_rate, |
| const FrameEncodedCallback& callback) |
| : ImplBase(cast_environment, |
| transport::kPcm16, |
| num_channels, |
| sampling_rate, |
| callback), |
| buffer_(new int16[num_channels * samples_per_frame_]) { |
| if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) |
| return; |
| cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; |
| } |
| |
| private: |
| virtual ~Pcm16Impl() {} |
| |
| virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, |
| int source_offset, |
| int buffer_fill_offset, |
| int num_samples) OVERRIDE { |
| audio_bus->ToInterleavedPartial( |
| source_offset, |
| num_samples, |
| sizeof(int16), |
| buffer_.get() + buffer_fill_offset * num_channels_); |
| } |
| |
| virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { |
| // Output 16-bit PCM integers in big-endian byte order. |
| out->resize(num_channels_ * samples_per_frame_ * sizeof(int16)); |
| const int16* src = buffer_.get(); |
| const int16* const src_end = src + num_channels_ * samples_per_frame_; |
| uint16* dest = reinterpret_cast<uint16*>(&out->at(0)); |
| for (; src < src_end; ++src, ++dest) |
| *dest = base::HostToNet16(*src); |
| return true; |
| } |
| |
| private: |
| const scoped_ptr<int16[]> buffer_; |
| |
| DISALLOW_COPY_AND_ASSIGN(Pcm16Impl); |
| }; |
| |
| AudioEncoder::AudioEncoder( |
| const scoped_refptr<CastEnvironment>& cast_environment, |
| const AudioSenderConfig& audio_config, |
| const FrameEncodedCallback& frame_encoded_callback) |
| : cast_environment_(cast_environment) { |
| // Note: It doesn't matter which thread constructs AudioEncoder, just so long |
| // as all calls to InsertAudio() are by the same thread. |
| insert_thread_checker_.DetachFromThread(); |
| switch (audio_config.codec) { |
| case transport::kOpus: |
| impl_ = new OpusImpl(cast_environment, |
| audio_config.channels, |
| audio_config.frequency, |
| audio_config.bitrate, |
| frame_encoded_callback); |
| break; |
| case transport::kPcm16: |
| impl_ = new Pcm16Impl(cast_environment, |
| audio_config.channels, |
| audio_config.frequency, |
| frame_encoded_callback); |
| break; |
| default: |
| NOTREACHED() << "Unsupported or unspecified codec for audio encoder"; |
| break; |
| } |
| } |
| |
| AudioEncoder::~AudioEncoder() {} |
| |
| CastInitializationStatus AudioEncoder::InitializationResult() const { |
| DCHECK(insert_thread_checker_.CalledOnValidThread()); |
| if (impl_) { |
| return impl_->InitializationResult(); |
| } |
| return STATUS_UNSUPPORTED_AUDIO_CODEC; |
| } |
| |
| void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, |
| const base::TimeTicks& recorded_time) { |
| DCHECK(insert_thread_checker_.CalledOnValidThread()); |
| DCHECK(audio_bus.get()); |
| if (!impl_) { |
| NOTREACHED(); |
| return; |
| } |
| cast_environment_->PostTask(CastEnvironment::AUDIO, |
| FROM_HERE, |
| base::Bind(&AudioEncoder::ImplBase::EncodeAudio, |
| impl_, |
| base::Passed(&audio_bus), |
| recorded_time)); |
| } |
| |
| } // namespace cast |
| } // namespace media |