blob: 99470601412e3aa6d9311d2098b55e388a137528 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
namespace {
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320
};
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kStereo:
return false;
case AudioProcessing::kMonoAndKeyboard:
case AudioProcessing::kStereoAndKeyboard:
return true;
}
assert(false);
return false;
}
int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kStereo:
assert(false);
return -1;
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
template <typename T>
void StereoToMono(const T* left, const T* right, T* out,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; ++i)
out[i] = (left[i] + right[i]) / 2;
}
} // namespace
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(int samples_per_channel, int num_channels)
: ivalid_(true),
ibuf_(samples_per_channel, num_channels),
fvalid_(true),
fbuf_(samples_per_channel, num_channels) {}
ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
ChannelBuffer<float>* fbuf() { return fbuf(false); }
const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
private:
ChannelBuffer<int16_t>* ibuf(bool readonly) {
RefreshI();
fvalid_ = readonly;
return &ibuf_;
}
ChannelBuffer<float>* fbuf(bool readonly) {
RefreshF();
ivalid_ = readonly;
return &fbuf_;
}
void RefreshF() {
if (!fvalid_) {
assert(ivalid_);
const int16_t* const int_data = ibuf_.data();
float* const float_data = fbuf_.data();
const int length = fbuf_.length();
for (int i = 0; i < length; ++i)
float_data[i] = int_data[i];
fvalid_ = true;
}
}
void RefreshI() {
if (!ivalid_) {
assert(fvalid_);
FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
ivalid_ = true;
}
}
bool ivalid_;
ChannelBuffer<int16_t> ibuf_;
bool fvalid_;
ChannelBuffer<float> fbuf_;
};
AudioBuffer::AudioBuffer(int input_samples_per_channel,
int num_input_channels,
int process_samples_per_channel,
int num_process_channels,
int output_samples_per_channel)
: input_samples_per_channel_(input_samples_per_channel),
num_input_channels_(num_input_channels),
proc_samples_per_channel_(process_samples_per_channel),
num_proc_channels_(num_process_channels),
output_samples_per_channel_(output_samples_per_channel),
samples_per_split_channel_(proc_samples_per_channel_),
mixed_low_pass_valid_(false),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
keyboard_data_(NULL),
channels_(new IFChannelBuffer(proc_samples_per_channel_,
num_proc_channels_)) {
assert(input_samples_per_channel_ > 0);
assert(proc_samples_per_channel_ > 0);
assert(output_samples_per_channel_ > 0);
assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
assert(num_proc_channels_ <= num_input_channels);
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
num_proc_channels_));
}
if (input_samples_per_channel_ != proc_samples_per_channel_ ||
output_samples_per_channel_ != proc_samples_per_channel_) {
// Create an intermediate buffer for resampling.
process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
num_proc_channels_));
}
if (input_samples_per_channel_ != proc_samples_per_channel_) {
input_resamplers_.reserve(num_proc_channels_);
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_.push_back(
new PushSincResampler(input_samples_per_channel_,
proc_samples_per_channel_));
}
}
if (output_samples_per_channel_ != proc_samples_per_channel_) {
output_resamplers_.reserve(num_proc_channels_);
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_.push_back(
new PushSincResampler(proc_samples_per_channel_,
output_samples_per_channel_));
}
}
if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
samples_per_split_channel_ = kSamplesPer16kHzChannel;
split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
}
}
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::CopyFrom(const float* const* data,
int samples_per_channel,
AudioProcessing::ChannelLayout layout) {
assert(samples_per_channel == input_samples_per_channel_);
assert(ChannelsFromLayout(layout) == num_input_channels_);
InitForNewData();
if (HasKeyboardChannel(layout)) {
keyboard_data_ = data[KeyboardChannelIndex(layout)];
}
// Downmix.
const float* const* data_ptr = data;
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
StereoToMono(data[0],
data[1],
input_buffer_->channel(0),
input_samples_per_channel_);
data_ptr = input_buffer_->channels();
}
// Resample.
if (input_samples_per_channel_ != proc_samples_per_channel_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(data_ptr[i],
input_samples_per_channel_,
process_buffer_->channel(i),
proc_samples_per_channel_);
}
data_ptr = process_buffer_->channels();
}
// Convert to int16.
for (int i = 0; i < num_proc_channels_; ++i) {
FloatToFloatS16(data_ptr[i], proc_samples_per_channel_,
channels_->fbuf()->channel(i));
}
}
void AudioBuffer::CopyTo(int samples_per_channel,
AudioProcessing::ChannelLayout layout,
float* const* data) {
assert(samples_per_channel == output_samples_per_channel_);
assert(ChannelsFromLayout(layout) == num_proc_channels_);
// Convert to float.
float* const* data_ptr = data;
if (output_samples_per_channel_ != proc_samples_per_channel_) {
// Convert to an intermediate buffer for subsequent resampling.
data_ptr = process_buffer_->channels();
}
for (int i = 0; i < num_proc_channels_; ++i) {
FloatS16ToFloat(channels_->fbuf()->channel(i),
proc_samples_per_channel_,
data_ptr[i]);
}
// Resample.
if (output_samples_per_channel_ != proc_samples_per_channel_) {
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_[i]->Resample(data_ptr[i],
proc_samples_per_channel_,
data[i],
output_samples_per_channel_);
}
}
}
void AudioBuffer::InitForNewData() {
keyboard_data_ = NULL;
mixed_low_pass_valid_ = false;
reference_copied_ = false;
activity_ = AudioFrame::kVadUnknown;
}
const int16_t* AudioBuffer::data(int channel) const {
return channels_->ibuf_const()->channel(channel);
}
int16_t* AudioBuffer::data(int channel) {
mixed_low_pass_valid_ = false;
return channels_->ibuf()->channel(channel);
}
const float* AudioBuffer::data_f(int channel) const {
return channels_->fbuf_const()->channel(channel);
}
float* AudioBuffer::data_f(int channel) {
mixed_low_pass_valid_ = false;
return channels_->fbuf()->channel(channel);
}
const float* const* AudioBuffer::channels_f() const {
return channels_->fbuf_const()->channels();
}
float* const* AudioBuffer::channels_f() {
mixed_low_pass_valid_ = false;
return channels_->fbuf()->channels();
}
const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
return split_channels_low_.get()
? split_channels_low_->ibuf_const()->channel(channel)
: data(channel);
}
int16_t* AudioBuffer::low_pass_split_data(int channel) {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->ibuf()->channel(channel)
: data(channel);
}
const float* AudioBuffer::low_pass_split_data_f(int channel) const {
return split_channels_low_.get()
? split_channels_low_->fbuf_const()->channel(channel)
: data_f(channel);
}
float* AudioBuffer::low_pass_split_data_f(int channel) {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->fbuf()->channel(channel)
: data_f(channel);
}
const float* const* AudioBuffer::low_pass_split_channels_f() const {
return split_channels_low_.get()
? split_channels_low_->fbuf_const()->channels()
: channels_f();
}
float* const* AudioBuffer::low_pass_split_channels_f() {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->fbuf()->channels()
: channels_f();
}
const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
return split_channels_high_.get()
? split_channels_high_->ibuf_const()->channel(channel)
: NULL;
}
int16_t* AudioBuffer::high_pass_split_data(int channel) {
return split_channels_high_.get()
? split_channels_high_->ibuf()->channel(channel)
: NULL;
}
const float* AudioBuffer::high_pass_split_data_f(int channel) const {
return split_channels_high_.get()
? split_channels_high_->fbuf_const()->channel(channel)
: NULL;
}
float* AudioBuffer::high_pass_split_data_f(int channel) {
return split_channels_high_.get()
? split_channels_high_->fbuf()->channel(channel)
: NULL;
}
const float* const* AudioBuffer::high_pass_split_channels_f() const {
return split_channels_high_.get()
? split_channels_high_->fbuf_const()->channels()
: NULL;
}
float* const* AudioBuffer::high_pass_split_channels_f() {
return split_channels_high_.get()
? split_channels_high_->fbuf()->channels()
: NULL;
}
const int16_t* AudioBuffer::mixed_low_pass_data() {
// Currently only mixing stereo to mono is supported.
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
if (num_proc_channels_ == 1) {
return low_pass_split_data(0);
}
if (!mixed_low_pass_valid_) {
if (!mixed_low_pass_channels_.get()) {
mixed_low_pass_channels_.reset(
new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
}
StereoToMono(low_pass_split_data(0),
low_pass_split_data(1),
mixed_low_pass_channels_->data(),
samples_per_split_channel_);
mixed_low_pass_valid_ = true;
}
return mixed_low_pass_channels_->data();
}
const int16_t* AudioBuffer::low_pass_reference(int channel) const {
if (!reference_copied_) {
return NULL;
}
return low_pass_reference_channels_->channel(channel);
}
const float* AudioBuffer::keyboard_data() const {
return keyboard_data_;
}
SplitFilterStates* AudioBuffer::filter_states(int channel) {
assert(channel >= 0 && channel < num_proc_channels_);
return &filter_states_[channel];
}
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity;
}
AudioFrame::VADActivity AudioBuffer::activity() const {
return activity_;
}
int AudioBuffer::num_channels() const {
return num_proc_channels_;
}
int AudioBuffer::samples_per_channel() const {
return proc_samples_per_channel_;
}
int AudioBuffer::samples_per_split_channel() const {
return samples_per_split_channel_;
}
int AudioBuffer::samples_per_keyboard_channel() const {
// We don't resample the keyboard channel.
return input_samples_per_channel_;
}
// TODO(andrew): Do deinterleaving and mixing in one step?
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(proc_samples_per_channel_ == input_samples_per_channel_);
assert(frame->num_channels_ == num_input_channels_);
assert(frame->samples_per_channel_ == proc_samples_per_channel_);
InitForNewData();
activity_ = frame->vad_activity_;
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
// Downmix directly; no explicit deinterleaving needed.
int16_t* downmixed = channels_->ibuf()->channel(0);
for (int i = 0; i < input_samples_per_channel_; ++i) {
downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
}
} else {
assert(num_proc_channels_ == num_input_channels_);
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_proc_channels_; ++i) {
int16_t* deinterleaved = channels_->ibuf()->channel(i);
int interleaved_idx = i;
for (int j = 0; j < proc_samples_per_channel_; ++j) {
deinterleaved[j] = interleaved[interleaved_idx];
interleaved_idx += num_proc_channels_;
}
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
assert(proc_samples_per_channel_ == output_samples_per_channel_);
assert(num_proc_channels_ == num_input_channels_);
assert(frame->num_channels_ == num_proc_channels_);
assert(frame->samples_per_channel_ == proc_samples_per_channel_);
frame->vad_activity_ = activity_;
if (!data_changed) {
return;
}
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_proc_channels_; i++) {
int16_t* deinterleaved = channels_->ibuf()->channel(i);
int interleaved_idx = i;
for (int j = 0; j < proc_samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
interleaved_idx += num_proc_channels_;
}
}
}
void AudioBuffer::CopyLowPassToReference() {
reference_copied_ = true;
if (!low_pass_reference_channels_.get()) {
low_pass_reference_channels_.reset(
new ChannelBuffer<int16_t>(samples_per_split_channel_,
num_proc_channels_));
}
for (int i = 0; i < num_proc_channels_; i++) {
low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
}
}
} // namespace webrtc