blob: 8860c7dd2d87b31f0a939f19fd65911a88a1eca2 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/cast/audio_sender/audio_encoder.h"
#include <algorithm>
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/location.h"
#include "base/stl_util.h"
#include "base/sys_byteorder.h"
#include "base/time/time.h"
#include "media/base/audio_bus.h"
#include "media/cast/cast_defines.h"
#include "media/cast/cast_environment.h"
#include "third_party/opus/src/include/opus.h"
namespace media {
namespace cast {
namespace {
// The fixed number of audio frames per second and, inversely, the duration of
// one frame's worth of samples.
const int kFramesPerSecond = 100;
const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder!
// Threshold used to decide whether audio being delivered to the encoder is
// coming in too slow with respect to the capture timestamps.
const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
} // namespace
// Base class that handles the common problem of feeding one or more AudioBus'
// data into a buffer and then, once the buffer is full, encoding the signal and
// emitting an EncodedFrame via the FrameEncodedCallback.
//
// Subclasses complete the implementation by handling the actual encoding
// details.
class AudioEncoder::ImplBase
: public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
public:
ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
transport::AudioCodec codec,
int num_channels,
int sampling_rate,
const FrameEncodedCallback& callback)
: cast_environment_(cast_environment),
codec_(codec),
num_channels_(num_channels),
samples_per_frame_(sampling_rate / kFramesPerSecond),
callback_(callback),
cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
buffer_fill_end_(0),
frame_id_(0),
frame_rtp_timestamp_(0) {
// Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
sampling_rate % kFramesPerSecond != 0 ||
samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
}
}
CastInitializationStatus InitializationResult() const {
return cast_initialization_status_;
}
void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
const base::TimeTicks& recorded_time) {
DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
DCHECK(!recorded_time.is_null());
// Determine whether |recorded_time| is consistent with the amount of audio
// data having been processed in the past. Resolve the underrun problem by
// dropping data from the internal buffer and skipping ahead the next
// frame's RTP timestamp by the estimated number of frames missed. On the
// other hand, don't attempt to resolve overruns: A receiver should
// gracefully deal with an excess of audio data.
const base::TimeDelta frame_duration =
base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
base::TimeDelta buffer_fill_duration =
buffer_fill_end_ * frame_duration / samples_per_frame_;
if (!frame_capture_time_.is_null()) {
const base::TimeDelta amount_ahead_by =
recorded_time - (frame_capture_time_ + buffer_fill_duration);
if (amount_ahead_by >
base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
buffer_fill_end_ = 0;
buffer_fill_duration = base::TimeDelta();
const int64 num_frames_missed = amount_ahead_by /
base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
frame_rtp_timestamp_ +=
static_cast<uint32>(num_frames_missed * samples_per_frame_);
DVLOG(1) << "Skipping RTP timestamp ahead to account for "
<< num_frames_missed * samples_per_frame_
<< " samples' worth of underrun.";
}
}
frame_capture_time_ = recorded_time - buffer_fill_duration;
// Encode all audio in |audio_bus| into zero or more frames.
int src_pos = 0;
while (src_pos < audio_bus->frames()) {
const int num_samples_to_xfer = std::min(
samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
DCHECK_EQ(audio_bus->channels(), num_channels_);
TransferSamplesIntoBuffer(
audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
src_pos += num_samples_to_xfer;
buffer_fill_end_ += num_samples_to_xfer;
if (buffer_fill_end_ < samples_per_frame_)
break;
scoped_ptr<transport::EncodedFrame> audio_frame(
new transport::EncodedFrame());
audio_frame->dependency = transport::EncodedFrame::KEY;
audio_frame->frame_id = frame_id_;
audio_frame->referenced_frame_id = frame_id_;
audio_frame->rtp_timestamp = frame_rtp_timestamp_;
audio_frame->reference_time = frame_capture_time_;
if (EncodeFromFilledBuffer(&audio_frame->data)) {
cast_environment_->PostTask(
CastEnvironment::MAIN,
FROM_HERE,
base::Bind(callback_, base::Passed(&audio_frame)));
}
// Reset the internal buffer, frame ID, and timestamps for the next frame.
buffer_fill_end_ = 0;
++frame_id_;
frame_rtp_timestamp_ += samples_per_frame_;
frame_capture_time_ += frame_duration;
}
}
protected:
friend class base::RefCountedThreadSafe<ImplBase>;
virtual ~ImplBase() {}
virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) = 0;
virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
const scoped_refptr<CastEnvironment> cast_environment_;
const transport::AudioCodec codec_;
const int num_channels_;
const int samples_per_frame_;
const FrameEncodedCallback callback_;
// Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
CastInitializationStatus cast_initialization_status_;
private:
// In the case where a call to EncodeAudio() cannot completely fill the
// buffer, this points to the position at which to populate data in a later
// call.
int buffer_fill_end_;
// A counter used to label EncodedFrames.
uint32 frame_id_;
// The RTP timestamp for the next frame of encoded audio. This is defined as
// the number of audio samples encoded so far, plus the estimated number of
// samples that were missed due to data underruns. A receiver uses this value
// to detect gaps in the audio signal data being provided. Per the spec, RTP
// timestamp values are allowed to overflow and roll around past zero.
uint32 frame_rtp_timestamp_;
// The local system time associated with the start of the next frame of
// encoded audio. This value is passed on to a receiver as a reference clock
// timestamp for the purposes of synchronizing audio and video. Its
// progression is expected to drift relative to the elapsed time implied by
// the RTP timestamps.
base::TimeTicks frame_capture_time_;
DISALLOW_COPY_AND_ASSIGN(ImplBase);
};
class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
public:
OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
const FrameEncodedCallback& callback)
: ImplBase(cast_environment,
transport::kOpus,
num_channels,
sampling_rate,
callback),
encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
buffer_(new float[num_channels * samples_per_frame_]) {
if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
return;
if (opus_encoder_init(opus_encoder_,
sampling_rate,
num_channels,
OPUS_APPLICATION_AUDIO) != OPUS_OK) {
ImplBase::cast_initialization_status_ =
STATUS_INVALID_AUDIO_CONFIGURATION;
return;
}
ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
if (bitrate <= 0) {
// Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
// variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
// frame size. The opus library authors may, of course, adjust this in
// later versions.
bitrate = OPUS_AUTO;
}
CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
OPUS_OK);
}
private:
virtual ~OpusImpl() {}
virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) OVERRIDE {
// Opus requires channel-interleaved samples in a single array.
for (int ch = 0; ch < audio_bus->channels(); ++ch) {
const float* src = audio_bus->channel(ch) + source_offset;
const float* const src_end = src + num_samples;
float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
for (; src < src_end; ++src, dest += num_channels_)
*dest = *src;
}
}
virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
out->resize(kOpusMaxPayloadSize);
const opus_int32 result =
opus_encode_float(opus_encoder_,
buffer_.get(),
samples_per_frame_,
reinterpret_cast<uint8*>(string_as_array(out)),
kOpusMaxPayloadSize);
if (result > 1) {
out->resize(result);
return true;
} else if (result < 0) {
LOG(ERROR) << "Error code from opus_encode_float(): " << result;
return false;
} else {
// Do nothing: The documentation says that a return value of zero or
// one byte means the packet does not need to be transmitted.
return false;
}
}
const scoped_ptr<uint8[]> encoder_memory_;
OpusEncoder* const opus_encoder_;
const scoped_ptr<float[]> buffer_;
// This is the recommended value, according to documentation in
// third_party/opus/src/include/opus.h, so that the Opus encoder does not
// degrade the audio due to memory constraints.
//
// Note: Whereas other RTP implementations do not, the cast library is
// perfectly capable of transporting larger than MTU-sized audio frames.
static const int kOpusMaxPayloadSize = 4000;
DISALLOW_COPY_AND_ASSIGN(OpusImpl);
};
class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
public:
Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
const FrameEncodedCallback& callback)
: ImplBase(cast_environment,
transport::kPcm16,
num_channels,
sampling_rate,
callback),
buffer_(new int16[num_channels * samples_per_frame_]) {
if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
return;
cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
}
private:
virtual ~Pcm16Impl() {}
virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) OVERRIDE {
audio_bus->ToInterleavedPartial(
source_offset,
num_samples,
sizeof(int16),
buffer_.get() + buffer_fill_offset * num_channels_);
}
virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
// Output 16-bit PCM integers in big-endian byte order.
out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
const int16* src = buffer_.get();
const int16* const src_end = src + num_channels_ * samples_per_frame_;
uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
for (; src < src_end; ++src, ++dest)
*dest = base::HostToNet16(*src);
return true;
}
private:
const scoped_ptr<int16[]> buffer_;
DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
};
AudioEncoder::AudioEncoder(
const scoped_refptr<CastEnvironment>& cast_environment,
const AudioSenderConfig& audio_config,
const FrameEncodedCallback& frame_encoded_callback)
: cast_environment_(cast_environment) {
// Note: It doesn't matter which thread constructs AudioEncoder, just so long
// as all calls to InsertAudio() are by the same thread.
insert_thread_checker_.DetachFromThread();
switch (audio_config.codec) {
case transport::kOpus:
impl_ = new OpusImpl(cast_environment,
audio_config.channels,
audio_config.frequency,
audio_config.bitrate,
frame_encoded_callback);
break;
case transport::kPcm16:
impl_ = new Pcm16Impl(cast_environment,
audio_config.channels,
audio_config.frequency,
frame_encoded_callback);
break;
default:
NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
break;
}
}
AudioEncoder::~AudioEncoder() {}
CastInitializationStatus AudioEncoder::InitializationResult() const {
DCHECK(insert_thread_checker_.CalledOnValidThread());
if (impl_) {
return impl_->InitializationResult();
}
return STATUS_UNSUPPORTED_AUDIO_CODEC;
}
void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
const base::TimeTicks& recorded_time) {
DCHECK(insert_thread_checker_.CalledOnValidThread());
DCHECK(audio_bus.get());
if (!impl_) {
NOTREACHED();
return;
}
cast_environment_->PostTask(CastEnvironment::AUDIO,
FROM_HERE,
base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
impl_,
base::Passed(&audio_bus),
recorded_time));
}
} // namespace cast
} // namespace media