blob: 1e66736c2b85c54383edf6758dc21dd9502ef81b [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h"
#include <algorithm>
#include "base/logging.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/buffers.h"
#include "media/base/data_buffer.h"
#include "media/base/limits.h"
#include "media/ffmpeg/ffmpeg_common.h"
// Include FFmpeg header files.
extern "C" {
// Temporarily disable possible loss of data warning.
MSVC_PUSH_DISABLE_WARNING(4244);
#include <libavcodec/avcodec.h>
MSVC_POP_WARNING();
} // extern "C"
namespace media {
// Maximum number of channels with defined layout in src/media.
static const int kMaxChannels = 8;
static AVCodecID CdmAudioCodecToCodecID(
cdm::AudioDecoderConfig::AudioCodec audio_codec) {
switch (audio_codec) {
case cdm::AudioDecoderConfig::kCodecVorbis:
return AV_CODEC_ID_VORBIS;
case cdm::AudioDecoderConfig::kCodecAac:
return AV_CODEC_ID_AAC;
case cdm::AudioDecoderConfig::kUnknownAudioCodec:
default:
NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
return AV_CODEC_ID_NONE;
}
}
static void CdmAudioDecoderConfigToAVCodecContext(
const cdm::AudioDecoderConfig& config,
AVCodecContext* codec_context) {
codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
switch (config.bits_per_channel) {
case 8:
codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
break;
case 16:
codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
break;
case 32:
codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
break;
default:
DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
"per channel: " << config.bits_per_channel;
codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
}
codec_context->channels = config.channel_count;
codec_context->sample_rate = config.samples_per_second;
if (config.extra_data) {
codec_context->extradata_size = config.extra_data_size;
codec_context->extradata = reinterpret_cast<uint8_t*>(
av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
memcpy(codec_context->extradata, config.extra_data,
config.extra_data_size);
memset(codec_context->extradata + config.extra_data_size, '\0',
FF_INPUT_BUFFER_PADDING_SIZE);
} else {
codec_context->extradata = NULL;
codec_context->extradata_size = 0;
}
}
FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host)
: is_initialized_(false),
host_(host),
bits_per_channel_(0),
samples_per_second_(0),
channels_(0),
av_sample_format_(0),
bytes_per_frame_(0),
last_input_timestamp_(kNoTimestamp()),
output_bytes_to_drop_(0) {
}
FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
ReleaseFFmpegResources();
}
bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
DVLOG(1) << "Initialize()";
if (!IsValidConfig(config)) {
LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
return false;
}
if (is_initialized_) {
LOG(ERROR) << "Initialize(): Already initialized.";
return false;
}
// Initialize AVCodecContext structure.
codec_context_.reset(avcodec_alloc_context3(NULL));
CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
// MP3 decodes to S16P which we don't support, tell it to use S16 instead.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
DLOG(ERROR) << "Could not initialize audio decoder: "
<< codec_context_->codec_id;
return false;
}
// Ensure avcodec_open2() respected our format request.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
DLOG(ERROR) << "Unable to configure a supported sample format: "
<< codec_context_->sample_fmt;
return false;
}
// Some codecs will only output float data, so we need to convert to integer
// before returning the decoded buffer.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
// Preallocate the AudioBus for float conversions. We can treat interleaved
// float data as a single planar channel since our output is expected in an
// interleaved format anyways.
int channels = codec_context_->channels;
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT)
channels = 1;
converter_bus_ = AudioBus::CreateWrapper(channels);
}
// Success!
av_frame_.reset(avcodec_alloc_frame());
bits_per_channel_ = config.bits_per_channel;
samples_per_second_ = config.samples_per_second;
bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8;
output_timestamp_helper_.reset(
new AudioTimestampHelper(config.samples_per_second));
serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_);
is_initialized_ = true;
// Store initial values to guard against midstream configuration changes.
channels_ = codec_context_->channels;
av_sample_format_ = codec_context_->sample_fmt;
return true;
}
void FFmpegCdmAudioDecoder::Deinitialize() {
DVLOG(1) << "Deinitialize()";
ReleaseFFmpegResources();
is_initialized_ = false;
ResetTimestampState();
}
void FFmpegCdmAudioDecoder::Reset() {
DVLOG(1) << "Reset()";
avcodec_flush_buffers(codec_context_.get());
ResetTimestampState();
}
// static
bool FFmpegCdmAudioDecoder::IsValidConfig(
const cdm::AudioDecoderConfig& config) {
return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
config.channel_count > 0 &&
config.channel_count <= kMaxChannels &&
config.bits_per_channel > 0 &&
config.bits_per_channel <= limits::kMaxBitsPerSample &&
config.samples_per_second > 0 &&
config.samples_per_second <= limits::kMaxSampleRate;
}
cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
const uint8_t* compressed_buffer,
int32_t compressed_buffer_size,
int64_t input_timestamp,
cdm::AudioFrames* decoded_frames) {
DVLOG(1) << "DecodeBuffer()";
const bool is_end_of_stream = !compressed_buffer;
base::TimeDelta timestamp =
base::TimeDelta::FromMicroseconds(input_timestamp);
bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
if (!is_end_of_stream) {
if (last_input_timestamp_ == kNoTimestamp()) {
if (is_vorbis && timestamp < base::TimeDelta()) {
// Dropping frames for negative timestamps as outlined in section A.2
// in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
int frames_to_drop = floor(
0.5 + -timestamp.InSecondsF() * samples_per_second_);
output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
} else {
last_input_timestamp_ = timestamp;
}
} else if (timestamp != kNoTimestamp()) {
if (timestamp < last_input_timestamp_) {
base::TimeDelta diff = timestamp - last_input_timestamp_;
DVLOG(1) << "Input timestamps are not monotonically increasing! "
<< " ts " << timestamp.InMicroseconds() << " us"
<< " diff " << diff.InMicroseconds() << " us";
return cdm::kDecodeError;
}
last_input_timestamp_ = timestamp;
}
}
AVPacket packet;
av_init_packet(&packet);
packet.data = const_cast<uint8_t*>(compressed_buffer);
packet.size = compressed_buffer_size;
// Each audio packet may contain several frames, so we must call the decoder
// until we've exhausted the packet. Regardless of the packet size we always
// want to hand it to the decoder at least once, otherwise we would end up
// skipping end of stream packets since they have a size of zero.
do {
// Reset frame to default values.
avcodec_get_frame_defaults(av_frame_.get());
int frame_decoded = 0;
int result = avcodec_decode_audio4(
codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
if (result < 0) {
DCHECK(!is_end_of_stream)
<< "End of stream buffer produced an error! "
<< "This is quite possibly a bug in the audio decoder not handling "
<< "end of stream AVPackets correctly.";
DLOG(ERROR)
<< "Error decoding an audio frame with timestamp: "
<< timestamp.InMicroseconds() << " us, duration: "
<< timestamp.InMicroseconds() << " us, packet size: "
<< compressed_buffer_size << " bytes";
return cdm::kDecodeError;
}
// Update packet size and data pointer in case we need to call the decoder
// with the remaining bytes from this packet.
packet.size -= result;
packet.data += result;
if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
!is_end_of_stream) {
DCHECK(timestamp != kNoTimestamp());
if (output_bytes_to_drop_ > 0) {
// Currently Vorbis is the only codec that causes us to drop samples.
// If we have to drop samples it always means the timeline starts at 0.
DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
} else {
output_timestamp_helper_->SetBaseTimestamp(timestamp);
}
}
int decoded_audio_size = 0;
if (frame_decoded) {
if (av_frame_->sample_rate != samples_per_second_ ||
av_frame_->channels != channels_ ||
av_frame_->format != av_sample_format_) {
DLOG(ERROR) << "Unsupported midstream configuration change!"
<< " Sample Rate: " << av_frame_->sample_rate << " vs "
<< samples_per_second_
<< ", Channels: " << av_frame_->channels << " vs "
<< channels_
<< ", Sample Format: " << av_frame_->format << " vs "
<< av_sample_format_;
return cdm::kDecodeError;
}
decoded_audio_size = av_samples_get_buffer_size(
NULL, codec_context_->channels, av_frame_->nb_samples,
codec_context_->sample_fmt, 1);
// If we're decoding into float, adjust audio size.
if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) {
DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT ||
codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
decoded_audio_size *=
static_cast<float>(bits_per_channel_ / 8) / sizeof(float);
}
}
int start_sample = 0;
if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
<< "Decoder didn't output full frames";
int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
start_sample = dropped_size / bytes_per_frame_;
decoded_audio_size -= dropped_size;
output_bytes_to_drop_ -= dropped_size;
}
scoped_refptr<DataBuffer> output;
if (decoded_audio_size > 0) {
DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
<< "Decoder didn't output full frames";
// Convert float data using an AudioBus.
if (converter_bus_) {
// Setup the AudioBus as a wrapper of the AVFrame data and then use
// AudioBus::ToInterleaved() to convert the data as necessary.
int skip_frames = start_sample;
int total_frames = av_frame_->nb_samples;
int frames_to_interleave = decoded_audio_size / bytes_per_frame_;
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
DCHECK_EQ(converter_bus_->channels(), 1);
total_frames *= codec_context_->channels;
skip_frames *= codec_context_->channels;
frames_to_interleave *= codec_context_->channels;
}
converter_bus_->set_frames(total_frames);
for (int i = 0; i < converter_bus_->channels(); ++i) {
converter_bus_->SetChannelData(i, reinterpret_cast<float*>(
av_frame_->extended_data[i]));
}
output = new DataBuffer(decoded_audio_size);
output->set_data_size(decoded_audio_size);
DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames);
converter_bus_->ToInterleavedPartial(
skip_frames, frames_to_interleave, bits_per_channel_ / 8,
output->writable_data());
} else {
output = DataBuffer::CopyFrom(
av_frame_->extended_data[0] + start_sample * bytes_per_frame_,
decoded_audio_size);
}
base::TimeDelta output_timestamp =
output_timestamp_helper_->GetTimestamp();
output_timestamp_helper_->AddFrames(decoded_audio_size /
bytes_per_frame_);
// Serialize the audio samples into |serialized_audio_frames_|.
SerializeInt64(output_timestamp.InMicroseconds());
SerializeInt64(output->data_size());
serialized_audio_frames_.insert(
serialized_audio_frames_.end(),
output->data(),
output->data() + output->data_size());
}
} while (packet.size > 0);
if (!serialized_audio_frames_.empty()) {
decoded_frames->SetFrameBuffer(
host_->Allocate(serialized_audio_frames_.size()));
if (!decoded_frames->FrameBuffer()) {
LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed.";
return cdm::kDecodeError;
}
memcpy(decoded_frames->FrameBuffer()->Data(),
&serialized_audio_frames_[0],
serialized_audio_frames_.size());
decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
serialized_audio_frames_.clear();
return cdm::kSuccess;
}
return cdm::kNeedMoreData;
}
void FFmpegCdmAudioDecoder::ResetTimestampState() {
output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
last_input_timestamp_ = kNoTimestamp();
output_bytes_to_drop_ = 0;
}
void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
DVLOG(1) << "ReleaseFFmpegResources()";
codec_context_.reset();
av_frame_.reset();
}
void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
int previous_size = serialized_audio_frames_.size();
serialized_audio_frames_.resize(previous_size + sizeof(value));
memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
}
} // namespace media