blob: db8e2015c1dd625d5edeb09205863f7f0a1f9545 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/speech/audio_encoder.h"
#include "base/basictypes.h"
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "content/browser/speech/audio_buffer.h"
#include "third_party/flac/include/FLAC/stream_encoder.h"
#include "third_party/speex/include/speex/speex.h"
namespace content {
namespace {
//-------------------------------- FLACEncoder ---------------------------------
const char* const kContentTypeFLAC = "audio/x-flac; rate=";
const int kFLACCompressionLevel = 0; // 0 for speed
class FLACEncoder : public AudioEncoder {
public:
FLACEncoder(int sampling_rate, int bits_per_sample);
virtual ~FLACEncoder();
virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
virtual void Flush() OVERRIDE;
private:
static FLAC__StreamEncoderWriteStatus WriteCallback(
const FLAC__StreamEncoder* encoder,
const FLAC__byte buffer[],
size_t bytes,
unsigned samples,
unsigned current_frame,
void* client_data);
FLAC__StreamEncoder* encoder_;
bool is_encoder_initialized_;
DISALLOW_COPY_AND_ASSIGN(FLACEncoder);
};
FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
const FLAC__StreamEncoder* encoder,
const FLAC__byte buffer[],
size_t bytes,
unsigned samples,
unsigned current_frame,
void* client_data) {
FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
DCHECK(me->encoder_ == encoder);
me->encoded_audio_buffer_.Enqueue(buffer, bytes);
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
}
FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
: AudioEncoder(std::string(kContentTypeFLAC) +
base::IntToString(sampling_rate),
bits_per_sample),
encoder_(FLAC__stream_encoder_new()),
is_encoder_initialized_(false) {
FLAC__stream_encoder_set_channels(encoder_, 1);
FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample);
FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate);
FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel);
// Initializing the encoder will cause sync bytes to be written to
// its output stream, so we wait until the first call to this method
// before doing so.
}
FLACEncoder::~FLACEncoder() {
FLAC__stream_encoder_delete(encoder_);
}
void FLACEncoder::Encode(const AudioChunk& raw_audio) {
DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
if (!is_encoder_initialized_) {
const FLAC__StreamEncoderInitStatus encoder_status =
FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
NULL, this);
DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
is_encoder_initialized_ = true;
}
// FLAC encoder wants samples as int32s.
const int num_samples = raw_audio.NumSamples();
scoped_ptr<FLAC__int32[]> flac_samples(new FLAC__int32[num_samples]);
FLAC__int32* flac_samples_ptr = flac_samples.get();
for (int i = 0; i < num_samples; ++i)
flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
}
void FLACEncoder::Flush() {
FLAC__stream_encoder_finish(encoder_);
}
//-------------------------------- SpeexEncoder --------------------------------
const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=";
const int kSpeexEncodingQuality = 8;
const int kMaxSpeexFrameLength = 110; // (44kbps rate sampled at 32kHz).
// Since the frame length gets written out as a byte in the encoded packet,
// make sure it is within the byte range.
COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
class SpeexEncoder : public AudioEncoder {
public:
explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
virtual ~SpeexEncoder();
virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
virtual void Flush() OVERRIDE {}
private:
void* encoder_state_;
SpeexBits bits_;
int samples_per_frame_;
char encoded_frame_data_[kMaxSpeexFrameLength + 1]; // +1 for the frame size.
DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
};
SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
: AudioEncoder(std::string(kContentTypeSpeex) +
base::IntToString(sampling_rate),
bits_per_sample) {
// speex_bits_init() does not initialize all of the |bits_| struct.
memset(&bits_, 0, sizeof(bits_));
speex_bits_init(&bits_);
encoder_state_ = speex_encoder_init(&speex_wb_mode);
DCHECK(encoder_state_);
speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_);
DCHECK(samples_per_frame_ > 0);
int quality = kSpeexEncodingQuality;
speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality);
int vbr = 1;
speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr);
memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_));
}
SpeexEncoder::~SpeexEncoder() {
speex_bits_destroy(&bits_);
speex_encoder_destroy(encoder_state_);
}
void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
spx_int16_t* src_buffer =
const_cast<spx_int16_t*>(raw_audio.SamplesData16());
int num_samples = raw_audio.NumSamples();
// Drop incomplete frames, typically those which come in when recording stops.
num_samples -= (num_samples % samples_per_frame_);
for (int i = 0; i < num_samples; i += samples_per_frame_) {
speex_bits_reset(&bits_);
speex_encode_int(encoder_state_, src_buffer + i, &bits_);
// Encode the frame and place the size of the frame as the first byte. This
// is the packet format for MIME type x-speex-with-header-byte.
int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
kMaxSpeexFrameLength);
encoded_frame_data_[0] = static_cast<char>(frame_length);
encoded_audio_buffer_.Enqueue(
reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
}
}
} // namespace
AudioEncoder* AudioEncoder::Create(Codec codec,
int sampling_rate,
int bits_per_sample) {
if (codec == CODEC_FLAC)
return new FLACEncoder(sampling_rate, bits_per_sample);
return new SpeexEncoder(sampling_rate, bits_per_sample);
}
AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
: encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
mime_type_(mime_type),
bits_per_sample_(bits_per_sample) {
}
AudioEncoder::~AudioEncoder() {
}
scoped_refptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
return encoded_audio_buffer_.DequeueAll();
}
} // namespace content