| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h" |
| |
| #include <algorithm> |
| |
| #include "base/logging.h" |
| #include "media/base/audio_bus.h" |
| #include "media/base/audio_timestamp_helper.h" |
| #include "media/base/buffers.h" |
| #include "media/base/data_buffer.h" |
| #include "media/base/limits.h" |
| #include "media/ffmpeg/ffmpeg_common.h" |
| |
| // Include FFmpeg header files. |
| extern "C" { |
| // Temporarily disable possible loss of data warning. |
| MSVC_PUSH_DISABLE_WARNING(4244); |
| #include <libavcodec/avcodec.h> |
| MSVC_POP_WARNING(); |
| } // extern "C" |
| |
| namespace media { |
| |
| // Maximum number of channels with defined layout in src/media. |
| static const int kMaxChannels = 8; |
| |
| static AVCodecID CdmAudioCodecToCodecID( |
| cdm::AudioDecoderConfig::AudioCodec audio_codec) { |
| switch (audio_codec) { |
| case cdm::AudioDecoderConfig::kCodecVorbis: |
| return AV_CODEC_ID_VORBIS; |
| case cdm::AudioDecoderConfig::kCodecAac: |
| return AV_CODEC_ID_AAC; |
| case cdm::AudioDecoderConfig::kUnknownAudioCodec: |
| default: |
| NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; |
| return AV_CODEC_ID_NONE; |
| } |
| } |
| |
| static void CdmAudioDecoderConfigToAVCodecContext( |
| const cdm::AudioDecoderConfig& config, |
| AVCodecContext* codec_context) { |
| codec_context->codec_type = AVMEDIA_TYPE_AUDIO; |
| codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); |
| |
| switch (config.bits_per_channel) { |
| case 8: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_U8; |
| break; |
| case 16: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_S16; |
| break; |
| case 32: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_S32; |
| break; |
| default: |
| DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " |
| "per channel: " << config.bits_per_channel; |
| codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; |
| } |
| |
| codec_context->channels = config.channel_count; |
| codec_context->sample_rate = config.samples_per_second; |
| |
| if (config.extra_data) { |
| codec_context->extradata_size = config.extra_data_size; |
| codec_context->extradata = reinterpret_cast<uint8_t*>( |
| av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); |
| memcpy(codec_context->extradata, config.extra_data, |
| config.extra_data_size); |
| memset(codec_context->extradata + config.extra_data_size, '\0', |
| FF_INPUT_BUFFER_PADDING_SIZE); |
| } else { |
| codec_context->extradata = NULL; |
| codec_context->extradata_size = 0; |
| } |
| } |
| |
| FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host) |
| : is_initialized_(false), |
| host_(host), |
| bits_per_channel_(0), |
| samples_per_second_(0), |
| channels_(0), |
| av_sample_format_(0), |
| bytes_per_frame_(0), |
| last_input_timestamp_(kNoTimestamp()), |
| output_bytes_to_drop_(0) { |
| } |
| |
| FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { |
| ReleaseFFmpegResources(); |
| } |
| |
| bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { |
| DVLOG(1) << "Initialize()"; |
| |
| if (!IsValidConfig(config)) { |
| LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; |
| return false; |
| } |
| |
| if (is_initialized_) { |
| LOG(ERROR) << "Initialize(): Already initialized."; |
| return false; |
| } |
| |
| // Initialize AVCodecContext structure. |
| codec_context_.reset(avcodec_alloc_context3(NULL)); |
| CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get()); |
| |
| // MP3 decodes to S16P which we don't support, tell it to use S16 instead. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) |
| codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; |
| |
| AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); |
| if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) { |
| DLOG(ERROR) << "Could not initialize audio decoder: " |
| << codec_context_->codec_id; |
| return false; |
| } |
| |
| // Ensure avcodec_open2() respected our format request. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { |
| DLOG(ERROR) << "Unable to configure a supported sample format: " |
| << codec_context_->sample_fmt; |
| return false; |
| } |
| |
| // Some codecs will only output float data, so we need to convert to integer |
| // before returning the decoded buffer. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP || |
| codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { |
| // Preallocate the AudioBus for float conversions. We can treat interleaved |
| // float data as a single planar channel since our output is expected in an |
| // interleaved format anyways. |
| int channels = codec_context_->channels; |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) |
| channels = 1; |
| converter_bus_ = AudioBus::CreateWrapper(channels); |
| } |
| |
| // Success! |
| av_frame_.reset(avcodec_alloc_frame()); |
| bits_per_channel_ = config.bits_per_channel; |
| samples_per_second_ = config.samples_per_second; |
| bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8; |
| output_timestamp_helper_.reset( |
| new AudioTimestampHelper(config.samples_per_second)); |
| serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_); |
| is_initialized_ = true; |
| |
| // Store initial values to guard against midstream configuration changes. |
| channels_ = codec_context_->channels; |
| av_sample_format_ = codec_context_->sample_fmt; |
| |
| return true; |
| } |
| |
| void FFmpegCdmAudioDecoder::Deinitialize() { |
| DVLOG(1) << "Deinitialize()"; |
| ReleaseFFmpegResources(); |
| is_initialized_ = false; |
| ResetTimestampState(); |
| } |
| |
| void FFmpegCdmAudioDecoder::Reset() { |
| DVLOG(1) << "Reset()"; |
| avcodec_flush_buffers(codec_context_.get()); |
| ResetTimestampState(); |
| } |
| |
| // static |
| bool FFmpegCdmAudioDecoder::IsValidConfig( |
| const cdm::AudioDecoderConfig& config) { |
| return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && |
| config.channel_count > 0 && |
| config.channel_count <= kMaxChannels && |
| config.bits_per_channel > 0 && |
| config.bits_per_channel <= limits::kMaxBitsPerSample && |
| config.samples_per_second > 0 && |
| config.samples_per_second <= limits::kMaxSampleRate; |
| } |
| |
| cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( |
| const uint8_t* compressed_buffer, |
| int32_t compressed_buffer_size, |
| int64_t input_timestamp, |
| cdm::AudioFrames* decoded_frames) { |
| DVLOG(1) << "DecodeBuffer()"; |
| const bool is_end_of_stream = !compressed_buffer; |
| base::TimeDelta timestamp = |
| base::TimeDelta::FromMicroseconds(input_timestamp); |
| |
| bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; |
| if (!is_end_of_stream) { |
| if (last_input_timestamp_ == kNoTimestamp()) { |
| if (is_vorbis && timestamp < base::TimeDelta()) { |
| // Dropping frames for negative timestamps as outlined in section A.2 |
| // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html |
| int frames_to_drop = floor( |
| 0.5 + -timestamp.InSecondsF() * samples_per_second_); |
| output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; |
| } else { |
| last_input_timestamp_ = timestamp; |
| } |
| } else if (timestamp != kNoTimestamp()) { |
| if (timestamp < last_input_timestamp_) { |
| base::TimeDelta diff = timestamp - last_input_timestamp_; |
| DVLOG(1) << "Input timestamps are not monotonically increasing! " |
| << " ts " << timestamp.InMicroseconds() << " us" |
| << " diff " << diff.InMicroseconds() << " us"; |
| return cdm::kDecodeError; |
| } |
| |
| last_input_timestamp_ = timestamp; |
| } |
| } |
| |
| AVPacket packet; |
| av_init_packet(&packet); |
| packet.data = const_cast<uint8_t*>(compressed_buffer); |
| packet.size = compressed_buffer_size; |
| |
| // Each audio packet may contain several frames, so we must call the decoder |
| // until we've exhausted the packet. Regardless of the packet size we always |
| // want to hand it to the decoder at least once, otherwise we would end up |
| // skipping end of stream packets since they have a size of zero. |
| do { |
| // Reset frame to default values. |
| avcodec_get_frame_defaults(av_frame_.get()); |
| |
| int frame_decoded = 0; |
| int result = avcodec_decode_audio4( |
| codec_context_.get(), av_frame_.get(), &frame_decoded, &packet); |
| |
| if (result < 0) { |
| DCHECK(!is_end_of_stream) |
| << "End of stream buffer produced an error! " |
| << "This is quite possibly a bug in the audio decoder not handling " |
| << "end of stream AVPackets correctly."; |
| |
| DLOG(ERROR) |
| << "Error decoding an audio frame with timestamp: " |
| << timestamp.InMicroseconds() << " us, duration: " |
| << timestamp.InMicroseconds() << " us, packet size: " |
| << compressed_buffer_size << " bytes"; |
| |
| return cdm::kDecodeError; |
| } |
| |
| // Update packet size and data pointer in case we need to call the decoder |
| // with the remaining bytes from this packet. |
| packet.size -= result; |
| packet.data += result; |
| |
| if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && |
| !is_end_of_stream) { |
| DCHECK(timestamp != kNoTimestamp()); |
| if (output_bytes_to_drop_ > 0) { |
| // Currently Vorbis is the only codec that causes us to drop samples. |
| // If we have to drop samples it always means the timeline starts at 0. |
| DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); |
| output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); |
| } else { |
| output_timestamp_helper_->SetBaseTimestamp(timestamp); |
| } |
| } |
| |
| int decoded_audio_size = 0; |
| if (frame_decoded) { |
| if (av_frame_->sample_rate != samples_per_second_ || |
| av_frame_->channels != channels_ || |
| av_frame_->format != av_sample_format_) { |
| DLOG(ERROR) << "Unsupported midstream configuration change!" |
| << " Sample Rate: " << av_frame_->sample_rate << " vs " |
| << samples_per_second_ |
| << ", Channels: " << av_frame_->channels << " vs " |
| << channels_ |
| << ", Sample Format: " << av_frame_->format << " vs " |
| << av_sample_format_; |
| return cdm::kDecodeError; |
| } |
| |
| decoded_audio_size = av_samples_get_buffer_size( |
| NULL, codec_context_->channels, av_frame_->nb_samples, |
| codec_context_->sample_fmt, 1); |
| // If we're decoding into float, adjust audio size. |
| if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) { |
| DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT || |
| codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); |
| decoded_audio_size *= |
| static_cast<float>(bits_per_channel_ / 8) / sizeof(float); |
| } |
| } |
| |
| int start_sample = 0; |
| if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { |
| DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) |
| << "Decoder didn't output full frames"; |
| |
| int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); |
| start_sample = dropped_size / bytes_per_frame_; |
| decoded_audio_size -= dropped_size; |
| output_bytes_to_drop_ -= dropped_size; |
| } |
| |
| scoped_refptr<DataBuffer> output; |
| if (decoded_audio_size > 0) { |
| DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) |
| << "Decoder didn't output full frames"; |
| |
| // Convert float data using an AudioBus. |
| if (converter_bus_) { |
| // Setup the AudioBus as a wrapper of the AVFrame data and then use |
| // AudioBus::ToInterleaved() to convert the data as necessary. |
| int skip_frames = start_sample; |
| int total_frames = av_frame_->nb_samples; |
| int frames_to_interleave = decoded_audio_size / bytes_per_frame_; |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { |
| DCHECK_EQ(converter_bus_->channels(), 1); |
| total_frames *= codec_context_->channels; |
| skip_frames *= codec_context_->channels; |
| frames_to_interleave *= codec_context_->channels; |
| } |
| |
| converter_bus_->set_frames(total_frames); |
| for (int i = 0; i < converter_bus_->channels(); ++i) { |
| converter_bus_->SetChannelData(i, reinterpret_cast<float*>( |
| av_frame_->extended_data[i])); |
| } |
| |
| output = new DataBuffer(decoded_audio_size); |
| output->set_data_size(decoded_audio_size); |
| |
| DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames); |
| converter_bus_->ToInterleavedPartial( |
| skip_frames, frames_to_interleave, bits_per_channel_ / 8, |
| output->writable_data()); |
| } else { |
| output = DataBuffer::CopyFrom( |
| av_frame_->extended_data[0] + start_sample * bytes_per_frame_, |
| decoded_audio_size); |
| } |
| |
| base::TimeDelta output_timestamp = |
| output_timestamp_helper_->GetTimestamp(); |
| output_timestamp_helper_->AddFrames(decoded_audio_size / |
| bytes_per_frame_); |
| |
| // Serialize the audio samples into |serialized_audio_frames_|. |
| SerializeInt64(output_timestamp.InMicroseconds()); |
| SerializeInt64(output->data_size()); |
| serialized_audio_frames_.insert( |
| serialized_audio_frames_.end(), |
| output->data(), |
| output->data() + output->data_size()); |
| } |
| } while (packet.size > 0); |
| |
| if (!serialized_audio_frames_.empty()) { |
| decoded_frames->SetFrameBuffer( |
| host_->Allocate(serialized_audio_frames_.size())); |
| if (!decoded_frames->FrameBuffer()) { |
| LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed."; |
| return cdm::kDecodeError; |
| } |
| memcpy(decoded_frames->FrameBuffer()->Data(), |
| &serialized_audio_frames_[0], |
| serialized_audio_frames_.size()); |
| decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); |
| serialized_audio_frames_.clear(); |
| |
| return cdm::kSuccess; |
| } |
| |
| return cdm::kNeedMoreData; |
| } |
| |
| void FFmpegCdmAudioDecoder::ResetTimestampState() { |
| output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); |
| last_input_timestamp_ = kNoTimestamp(); |
| output_bytes_to_drop_ = 0; |
| } |
| |
| void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { |
| DVLOG(1) << "ReleaseFFmpegResources()"; |
| |
| codec_context_.reset(); |
| av_frame_.reset(); |
| } |
| |
| void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { |
| int previous_size = serialized_audio_frames_.size(); |
| serialized_audio_frames_.resize(previous_size + sizeof(value)); |
| memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); |
| } |
| |
| } // namespace media |