blob: 2425f094ce0f557df5328628c990c053062eb9a5 [file] [log] [blame]
/* Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <stdint.h>
#include "cras_system_state.h"
#include "cras_mix_ops.h"
#define MAX_VOLUME_TO_SCALE 0.9999999
#define MIN_VOLUME_TO_SCALE 0.0000001
/* function suffixes for SIMD ops */
#ifdef OPS_SSE42
#define OPS(a) a##_sse42
#elif OPS_AVX
#define OPS(a) a##_avx
#elif OPS_AVX2
#define OPS(a) a##_avx2
#elif OPS_FMA
#define OPS(a) a##_fma
#else
#define OPS(a) a
#endif
/* Checks if the scaler needs a scaling operation.
* We skip scaling for scaler too close to 1.0.
* Note that this is not subjected to MAX_VOLUME_TO_SCALE
* and MIN_VOLUME_TO_SCALE. */
static inline int need_to_scale(float scaler)
{
return (scaler < 0.99 || scaler > 1.01);
}
/*
* Signed 16 bit little endian functions.
*/
static void cras_mix_add_clip_s16_le(int16_t *dst, const int16_t *src,
size_t count)
{
int32_t sum;
size_t i;
for (i = 0; i < count; i++) {
sum = dst[i] + src[i];
if (sum > INT16_MAX)
sum = INT16_MAX;
else if (sum < INT16_MIN)
sum = INT16_MIN;
dst[i] = sum;
}
}
/* Adds src into dst, after scaling by vol.
* Just hard limits to the min and max S16 value, can be improved later. */
static void scale_add_clip_s16_le(int16_t *dst, const int16_t *src,
size_t count, float vol)
{
int32_t sum;
size_t i;
if (vol > MAX_VOLUME_TO_SCALE)
return cras_mix_add_clip_s16_le(dst, src, count);
for (i = 0; i < count; i++) {
sum = dst[i] + (int16_t)(src[i] * vol);
if (sum > INT16_MAX)
sum = INT16_MAX;
else if (sum < INT16_MIN)
sum = INT16_MIN;
dst[i] = sum;
}
}
/* Adds the first stream to the mix. Don't need to mix, just setup to the new
* values. If volume is 1.0, just memcpy. */
static void copy_scaled_s16_le(int16_t *dst, const int16_t *src, size_t count,
float volume_scaler)
{
int i;
if (volume_scaler > MAX_VOLUME_TO_SCALE) {
memcpy(dst, src, count * sizeof(*src));
return;
}
for (i = 0; i < count; i++)
dst[i] = src[i] * volume_scaler;
}
static void cras_scale_buffer_inc_s16_le(uint8_t *buffer, unsigned int count,
float scaler, float increment,
float target, int step)
{
int i = 0, j;
int16_t *out = (int16_t *)buffer;
if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
memset(out, 0, count * sizeof(*out));
return;
}
while (i + step <= count) {
for (j = 0; j < step; j++) {
float applied_scaler = scaler;
if ((applied_scaler > target && increment > 0) ||
(applied_scaler < target && increment < 0))
applied_scaler = target;
if (applied_scaler > MAX_VOLUME_TO_SCALE) {
} else if (applied_scaler < MIN_VOLUME_TO_SCALE) {
out[i] = 0;
} else {
out[i] *= applied_scaler;
}
i++;
}
scaler += increment;
}
}
static void cras_scale_buffer_s16_le(uint8_t *buffer, unsigned int count,
float scaler)
{
int i;
int16_t *out = (int16_t *)buffer;
if (scaler > MAX_VOLUME_TO_SCALE)
return;
if (scaler < MIN_VOLUME_TO_SCALE) {
memset(out, 0, count * sizeof(*out));
return;
}
for (i = 0; i < count; i++)
out[i] *= scaler;
}
static void cras_mix_add_s16_le(uint8_t *dst, uint8_t *src, unsigned int count,
unsigned int index, int mute, float mix_vol)
{
int16_t *out = (int16_t *)dst;
int16_t *in = (int16_t *)src;
if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
if (index == 0)
memset(out, 0, count * sizeof(*out));
return;
}
if (index == 0)
return copy_scaled_s16_le(out, in, count, mix_vol);
scale_add_clip_s16_le(out, in, count, mix_vol);
}
static void cras_mix_add_scale_stride_s16_le(uint8_t *dst, uint8_t *src,
unsigned int dst_stride,
unsigned int src_stride,
unsigned int count, float scaler)
{
unsigned int i;
/* optimise the loops for vectorization */
if (dst_stride == src_stride && dst_stride == 2) {
for (i = 0; i < count; i++) {
int32_t sum;
if (need_to_scale(scaler))
sum = *(int16_t *)dst +
*(int16_t *)src * scaler;
else
sum = *(int16_t *)dst + *(int16_t *)src;
if (sum > INT16_MAX)
sum = INT16_MAX;
else if (sum < INT16_MIN)
sum = INT16_MIN;
*(int16_t *)dst = sum;
dst += 2;
src += 2;
}
} else if (dst_stride == src_stride && dst_stride == 4) {
for (i = 0; i < count; i++) {
int32_t sum;
if (need_to_scale(scaler))
sum = *(int16_t *)dst +
*(int16_t *)src * scaler;
else
sum = *(int16_t *)dst + *(int16_t *)src;
if (sum > INT16_MAX)
sum = INT16_MAX;
else if (sum < INT16_MIN)
sum = INT16_MIN;
*(int16_t *)dst = sum;
dst += 4;
src += 4;
}
} else {
for (i = 0; i < count; i++) {
int32_t sum;
if (need_to_scale(scaler))
sum = *(int16_t *)dst +
*(int16_t *)src * scaler;
else
sum = *(int16_t *)dst + *(int16_t *)src;
if (sum > INT16_MAX)
sum = INT16_MAX;
else if (sum < INT16_MIN)
sum = INT16_MIN;
*(int16_t *)dst = sum;
dst += dst_stride;
src += src_stride;
}
}
}
/*
* Signed 24 bit little endian functions.
*/
static int32_t scale_s24_le(int32_t value, float scaler)
{
value = ((uint32_t)(value & 0x00ffffff)) << 8;
value *= scaler;
return (value >> 8) & 0x00ffffff;
}
static void cras_mix_add_clip_s24_le(int32_t *dst, const int32_t *src,
size_t count)
{
int32_t sum;
size_t i;
for (i = 0; i < count; i++) {
sum = dst[i] + src[i];
if (sum > 0x007fffff)
sum = 0x007fffff;
else if (sum < (int32_t)0xff800000)
sum = (int32_t)0xff800000;
dst[i] = sum;
}
}
/* Adds src into dst, after scaling by vol.
* Just hard limits to the min and max S24 value, can be improved later. */
static void scale_add_clip_s24_le(int32_t *dst, const int32_t *src,
size_t count, float vol)
{
int32_t sum;
size_t i;
if (vol > MAX_VOLUME_TO_SCALE)
return cras_mix_add_clip_s24_le(dst, src, count);
for (i = 0; i < count; i++) {
sum = dst[i] + (int32_t)(src[i] * vol);
if (sum > 0x007fffff)
sum = 0x007fffff;
else if (sum < (int32_t)0xff800000)
sum = (int32_t)0xff800000;
dst[i] = sum;
}
}
/* Adds the first stream to the mix. Don't need to mix, just setup to the new
* values. If volume is 1.0, just memcpy. */
static void copy_scaled_s24_le(int32_t *dst, const int32_t *src, size_t count,
float volume_scaler)
{
int i;
if (volume_scaler > MAX_VOLUME_TO_SCALE) {
memcpy(dst, src, count * sizeof(*src));
return;
}
for (i = 0; i < count; i++)
dst[i] = scale_s24_le(src[i], volume_scaler);
}
static void cras_scale_buffer_inc_s24_le(uint8_t *buffer, unsigned int count,
float scaler, float increment,
float target, int step)
{
int i = 0, j;
int32_t *out = (int32_t *)buffer;
if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
memset(out, 0, count * sizeof(*out));
return;
}
while (i + step <= count) {
for (j = 0; j < step; j++) {
float applied_scaler = scaler;
if ((applied_scaler > target && increment > 0) ||
(applied_scaler < target && increment < 0))
applied_scaler = target;
if (applied_scaler > MAX_VOLUME_TO_SCALE) {
} else if (applied_scaler < MIN_VOLUME_TO_SCALE) {
out[i] = 0;
} else {
out[i] = scale_s24_le(out[i], applied_scaler);
}
i++;
}
scaler += increment;
}
}
static void cras_scale_buffer_s24_le(uint8_t *buffer, unsigned int count,
float scaler)
{
int i;
int32_t *out = (int32_t *)buffer;
if (scaler > MAX_VOLUME_TO_SCALE)
return;
if (scaler < MIN_VOLUME_TO_SCALE) {
memset(out, 0, count * sizeof(*out));
return;
}
for (i = 0; i < count; i++)
out[i] = scale_s24_le(out[i], scaler);
}
static void cras_mix_add_s24_le(uint8_t *dst, uint8_t *src, unsigned int count,
unsigned int index, int mute, float mix_vol)
{
int32_t *out = (int32_t *)dst;
int32_t *in = (int32_t *)src;
if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
if (index == 0)
memset(out, 0, count * sizeof(*out));
return;
}
if (index == 0)
return copy_scaled_s24_le(out, in, count, mix_vol);
scale_add_clip_s24_le(out, in, count, mix_vol);
}
static void cras_mix_add_scale_stride_s24_le(uint8_t *dst, uint8_t *src,
unsigned int dst_stride,
unsigned int src_stride,
unsigned int count, float scaler)
{
unsigned int i;
/* optimise the loops for vectorization */
if (dst_stride == src_stride && dst_stride == 4) {
for (i = 0; i < count; i++) {
int32_t sum;
if (need_to_scale(scaler))
sum = *(int32_t *)dst +
scale_s24_le(*(int32_t *)src, scaler);
else
sum = *(int32_t *)dst + *(int32_t *)src;
if (sum > 0x007fffff)
sum = 0x007fffff;
else if (sum < (int32_t)0xff800000)
sum = (int32_t)0xff800000;
*(int32_t *)dst = sum;
dst += 4;
src += 4;
}
} else {
for (i = 0; i < count; i++) {
int32_t sum;
if (need_to_scale(scaler))
sum = *(int32_t *)dst +
scale_s24_le(*(int32_t *)src, scaler);
else
sum = *(int32_t *)dst + *(int32_t *)src;
if (sum > 0x007fffff)
sum = 0x007fffff;
else if (sum < (int32_t)0xff800000)
sum = (int32_t)0xff800000;
*(int32_t *)dst = sum;
dst += dst_stride;
src += src_stride;
}
}
}
/*
* Signed 32 bit little endian functions.
*/
static void cras_mix_add_clip_s32_le(int32_t *dst, const int32_t *src,
size_t count)
{
int64_t sum;
size_t i;
for (i = 0; i < count; i++) {
sum = (int64_t)dst[i] + (int64_t)src[i];
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
dst[i] = sum;
}
}
/* Adds src into dst, after scaling by vol.
* Just hard limits to the min and max S32 value, can be improved later. */
static void scale_add_clip_s32_le(int32_t *dst, const int32_t *src,
size_t count, float vol)
{
int64_t sum;
size_t i;
if (vol > MAX_VOLUME_TO_SCALE)
return cras_mix_add_clip_s32_le(dst, src, count);
for (i = 0; i < count; i++) {
sum = (int64_t)dst[i] + (int64_t)(src[i] * vol);
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
dst[i] = sum;
}
}
/* Adds the first stream to the mix. Don't need to mix, just setup to the new
* values. If volume is 1.0, just memcpy. */
static void copy_scaled_s32_le(int32_t *dst, const int32_t *src, size_t count,
float volume_scaler)
{
int i;
if (volume_scaler > MAX_VOLUME_TO_SCALE) {
memcpy(dst, src, count * sizeof(*src));
return;
}
for (i = 0; i < count; i++)
dst[i] = src[i] * volume_scaler;
}
static void cras_scale_buffer_inc_s32_le(uint8_t *buffer, unsigned int count,
float scaler, float increment,
float target, int step)
{
int i = 0, j;
int32_t *out = (int32_t *)buffer;
if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
memset(out, 0, count * sizeof(*out));
return;
}
while (i + step <= count) {
for (j = 0; j < step; j++) {
float applied_scaler = scaler;
if ((applied_scaler > target && increment > 0) ||
(applied_scaler < target && increment < 0))
applied_scaler = target;
if (applied_scaler > MAX_VOLUME_TO_SCALE) {
} else if (applied_scaler < MIN_VOLUME_TO_SCALE) {
out[i] = 0;
} else {
out[i] *= applied_scaler;
}
i++;
}
scaler += increment;
}
}
static void cras_scale_buffer_s32_le(uint8_t *buffer, unsigned int count,
float scaler)
{
int i;
int32_t *out = (int32_t *)buffer;
if (scaler > MAX_VOLUME_TO_SCALE)
return;
if (scaler < MIN_VOLUME_TO_SCALE) {
memset(out, 0, count * sizeof(*out));
return;
}
for (i = 0; i < count; i++)
out[i] *= scaler;
}
static void cras_mix_add_s32_le(uint8_t *dst, uint8_t *src, unsigned int count,
unsigned int index, int mute, float mix_vol)
{
int32_t *out = (int32_t *)dst;
int32_t *in = (int32_t *)src;
if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
if (index == 0)
memset(out, 0, count * sizeof(*out));
return;
}
if (index == 0)
return copy_scaled_s32_le(out, in, count, mix_vol);
scale_add_clip_s32_le(out, in, count, mix_vol);
}
static void cras_mix_add_scale_stride_s32_le(uint8_t *dst, uint8_t *src,
unsigned int dst_stride,
unsigned int src_stride,
unsigned int count, float scaler)
{
unsigned int i;
/* optimise the loops for vectorization */
if (dst_stride == src_stride && dst_stride == 4) {
for (i = 0; i < count; i++) {
int64_t sum;
if (need_to_scale(scaler))
sum = *(int32_t *)dst +
*(int32_t *)src * scaler;
else
sum = *(int32_t *)dst + *(int32_t *)src;
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
*(int32_t *)dst = sum;
dst += 4;
src += 4;
}
} else {
for (i = 0; i < count; i++) {
int64_t sum;
if (need_to_scale(scaler))
sum = *(int32_t *)dst +
*(int32_t *)src * scaler;
else
sum = *(int32_t *)dst + *(int32_t *)src;
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
*(int32_t *)dst = sum;
dst += dst_stride;
src += src_stride;
}
}
}
/*
* Signed 24 bit little endian in three bytes functions.
*/
/* Convert 3bytes Signed 24bit integer to a Signed 32bit integer.
* Just a helper function. */
static inline void convert_single_s243le_to_s32le(int32_t *dst,
const uint8_t *src)
{
*dst = 0;
memcpy((uint8_t *)dst + 1, src, 3);
}
static inline void convert_single_s32le_to_s243le(uint8_t *dst,
const int32_t *src)
{
memcpy(dst, (uint8_t *)src + 1, 3);
}
static void cras_mix_add_clip_s24_3le(uint8_t *dst, const uint8_t *src,
size_t count)
{
int64_t sum;
int32_t dst_frame;
int32_t src_frame;
size_t i;
for (i = 0; i < count; i++, dst += 3, src += 3) {
convert_single_s243le_to_s32le(&dst_frame, dst);
convert_single_s243le_to_s32le(&src_frame, src);
sum = (int64_t)dst_frame + (int64_t)src_frame;
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
dst_frame = (int32_t)sum;
convert_single_s32le_to_s243le(dst, &dst_frame);
}
}
/* Adds src into dst, after scaling by vol.
* Just hard limits to the min and max S24 value, can be improved later. */
static void scale_add_clip_s24_3le(uint8_t *dst, const uint8_t *src,
size_t count, float vol)
{
int64_t sum;
int32_t dst_frame;
int32_t src_frame;
size_t i;
if (vol > MAX_VOLUME_TO_SCALE)
return cras_mix_add_clip_s24_3le(dst, src, count);
for (i = 0; i < count; i++, dst += 3, src += 3) {
convert_single_s243le_to_s32le(&dst_frame, dst);
convert_single_s243le_to_s32le(&src_frame, src);
sum = (int64_t)dst_frame + (int64_t)(src_frame * vol);
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
dst_frame = (int32_t)sum;
convert_single_s32le_to_s243le(dst, &dst_frame);
}
}
/* Adds the first stream to the mix. Don't need to mix, just setup to the new
* values. If volume is 1.0, just memcpy. */
static void copy_scaled_s24_3le(uint8_t *dst, const uint8_t *src, size_t count,
float volume_scaler)
{
int32_t frame;
size_t i;
if (volume_scaler > MAX_VOLUME_TO_SCALE) {
memcpy(dst, src, 3 * count * sizeof(*src));
return;
}
for (i = 0; i < count; i++, dst += 3, src += 3) {
convert_single_s243le_to_s32le(&frame, src);
frame *= volume_scaler;
convert_single_s32le_to_s243le(dst, &frame);
}
}
static void cras_scale_buffer_inc_s24_3le(uint8_t *buffer, unsigned int count,
float scaler, float increment,
float target, int step)
{
int32_t frame;
int i = 0, j;
if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
memset(buffer, 0, 3 * count * sizeof(*buffer));
return;
}
while (i + step <= count) {
for (j = 0; j < step; j++) {
float applied_scaler = scaler;
if ((applied_scaler > target && increment > 0) ||
(applied_scaler < target && increment < 0))
applied_scaler = target;
convert_single_s243le_to_s32le(&frame, buffer);
if (applied_scaler > MAX_VOLUME_TO_SCALE) {
} else if (applied_scaler < MIN_VOLUME_TO_SCALE) {
frame = 0;
} else {
frame *= applied_scaler;
}
convert_single_s32le_to_s243le(buffer, &frame);
i++;
buffer += 3;
}
scaler += increment;
}
}
static void cras_scale_buffer_s24_3le(uint8_t *buffer, unsigned int count,
float scaler)
{
int32_t frame;
int i;
if (scaler > MAX_VOLUME_TO_SCALE)
return;
if (scaler < MIN_VOLUME_TO_SCALE) {
memset(buffer, 0, 3 * count * sizeof(*buffer));
return;
}
for (i = 0; i < count; i++, buffer += 3) {
convert_single_s243le_to_s32le(&frame, buffer);
frame *= scaler;
convert_single_s32le_to_s243le(buffer, &frame);
}
}
static void cras_mix_add_s24_3le(uint8_t *dst, uint8_t *src, unsigned int count,
unsigned int index, int mute, float mix_vol)
{
uint8_t *out = dst;
uint8_t *in = src;
if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
if (index == 0)
memset(out, 0, 3 * count * sizeof(*out));
return;
}
if (index == 0)
return copy_scaled_s24_3le(out, in, count, mix_vol);
scale_add_clip_s24_3le(out, in, count, mix_vol);
}
static void cras_mix_add_scale_stride_s24_3le(uint8_t *dst, uint8_t *src,
unsigned int dst_stride,
unsigned int src_stride,
unsigned int count, float scaler)
{
unsigned int i;
int64_t sum;
int32_t dst_frame;
int32_t src_frame;
for (i = 0; i < count; i++) {
convert_single_s243le_to_s32le(&dst_frame, dst);
convert_single_s243le_to_s32le(&src_frame, src);
if (need_to_scale(scaler))
sum = (int64_t)dst_frame + (int64_t)src_frame * scaler;
else
sum = (int64_t)dst_frame + (int64_t)src_frame;
if (sum > INT32_MAX)
sum = INT32_MAX;
else if (sum < INT32_MIN)
sum = INT32_MIN;
dst_frame = (int32_t)sum;
convert_single_s32le_to_s243le(dst, &dst_frame);
dst += dst_stride;
src += src_stride;
}
}
static void scale_buffer_increment(snd_pcm_format_t fmt, uint8_t *buff,
unsigned int count, float scaler,
float increment, float target, int step)
{
switch (fmt) {
case SND_PCM_FORMAT_S16_LE:
return cras_scale_buffer_inc_s16_le(buff, count, scaler,
increment, target, step);
case SND_PCM_FORMAT_S24_LE:
return cras_scale_buffer_inc_s24_le(buff, count, scaler,
increment, target, step);
case SND_PCM_FORMAT_S32_LE:
return cras_scale_buffer_inc_s32_le(buff, count, scaler,
increment, target, step);
case SND_PCM_FORMAT_S24_3LE:
return cras_scale_buffer_inc_s24_3le(buff, count, scaler,
increment, target, step);
default:
break;
}
}
static void scale_buffer(snd_pcm_format_t fmt, uint8_t *buff,
unsigned int count, float scaler)
{
switch (fmt) {
case SND_PCM_FORMAT_S16_LE:
return cras_scale_buffer_s16_le(buff, count, scaler);
case SND_PCM_FORMAT_S24_LE:
return cras_scale_buffer_s24_le(buff, count, scaler);
case SND_PCM_FORMAT_S32_LE:
return cras_scale_buffer_s32_le(buff, count, scaler);
case SND_PCM_FORMAT_S24_3LE:
return cras_scale_buffer_s24_3le(buff, count, scaler);
default:
break;
}
}
static void mix_add(snd_pcm_format_t fmt, uint8_t *dst, uint8_t *src,
unsigned int count, unsigned int index, int mute,
float mix_vol)
{
switch (fmt) {
case SND_PCM_FORMAT_S16_LE:
return cras_mix_add_s16_le(dst, src, count, index, mute,
mix_vol);
case SND_PCM_FORMAT_S24_LE:
return cras_mix_add_s24_le(dst, src, count, index, mute,
mix_vol);
case SND_PCM_FORMAT_S32_LE:
return cras_mix_add_s32_le(dst, src, count, index, mute,
mix_vol);
case SND_PCM_FORMAT_S24_3LE:
return cras_mix_add_s24_3le(dst, src, count, index, mute,
mix_vol);
default:
break;
}
}
static void mix_add_scale_stride(snd_pcm_format_t fmt, uint8_t *dst,
uint8_t *src, unsigned int count,
unsigned int dst_stride,
unsigned int src_stride, float scaler)
{
switch (fmt) {
case SND_PCM_FORMAT_S16_LE:
return cras_mix_add_scale_stride_s16_le(
dst, src, dst_stride, src_stride, count, scaler);
case SND_PCM_FORMAT_S24_LE:
return cras_mix_add_scale_stride_s24_le(
dst, src, dst_stride, src_stride, count, scaler);
case SND_PCM_FORMAT_S32_LE:
return cras_mix_add_scale_stride_s32_le(
dst, src, dst_stride, src_stride, count, scaler);
case SND_PCM_FORMAT_S24_3LE:
return cras_mix_add_scale_stride_s24_3le(
dst, src, dst_stride, src_stride, count, scaler);
default:
break;
}
}
static size_t mix_mute_buffer(uint8_t *dst, size_t frame_bytes, size_t count)
{
memset(dst, 0, count * frame_bytes);
return count;
}
const struct cras_mix_ops OPS(mixer_ops) = {
.scale_buffer = scale_buffer,
.scale_buffer_increment = scale_buffer_increment,
.add = mix_add,
.add_scale_stride = mix_add_scale_stride,
.mute_buffer = mix_mute_buffer,
};