blob: 44f72575ebc63bdaf876c076ef3d59bbcf24a44c [file] [log] [blame]
/* Copyright 2016 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <math.h> /* for abs() */
#include <stdio.h> /* for printf() */
#include <string.h> /* for memset() */
#include <stdint.h> /* for uint64 definition */
#include <stdlib.h> /* for exit() definition */
#include <time.h> /* for clock_gettime */
#include "../drc_math.h"
#include "../dsp_util.h"
/* Constant for converting time to milliseconds. */
#define BILLION 1000000000LL
/* Number of iterations for performance testing. */
#define ITERATIONS 400000
#if defined(__aarch64__)
int16_t float_to_short(float a)
{
int32_t ret;
asm volatile("fcvtas %s[ret], %s[a]\n"
"sqxtn %h[ret], %s[ret]\n"
: [ret] "=w"(ret)
: [a] "w"(a)
:);
return (int16_t)(ret);
}
#else
int16_t float_to_short(float a)
{
a += (a >= 0) ? 0.5f : -0.5f;
return (int16_t)(max(-32768, min(32767, a)));
}
#endif
void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
int channels, int frames)
{
float *output_ptr[channels];
int i, j;
for (i = 0; i < channels; i++)
output_ptr[i] = output[i];
for (i = 0; i < frames; i++)
for (j = 0; j < channels; j++)
*(output_ptr[j]++) = *input++ / 32768.0f;
}
void dsp_util_interleave_reference(float *const *input, int16_t *output,
int channels, int frames)
{
float *input_ptr[channels];
int i, j;
for (i = 0; i < channels; i++)
input_ptr[i] = input[i];
for (i = 0; i < frames; i++)
for (j = 0; j < channels; j++) {
float f = *(input_ptr[j]++) * 32768.0f;
*output++ = float_to_short(f);
}
}
/* Use fixed size allocation to avoid performance fluctuation of allocation. */
#define MAXSAMPLES 4096
#define MINSAMPLES 256
/* PAD buffer to check for overflows. */
#define PAD 4096
void TestRounding(float in, int16_t expected, int samples)
{
int i;
int max_diff;
int d;
short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
float *out_floats_ptr_c[2];
float *out_floats_ptr_opt[2];
out_floats_ptr_c[0] = out_floats_left_c;
out_floats_ptr_c[1] = out_floats_right_c;
out_floats_ptr_opt[0] = out_floats_left_opt;
out_floats_ptr_opt[1] = out_floats_right_opt;
for (i = 0; i < MAXSAMPLES; ++i) {
out_floats_left_c[i] = in;
out_floats_right_c[i] = in;
}
/* reference C interleave */
dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
samples);
/* measure optimized interleave */
for (i = 0; i < ITERATIONS; ++i) {
dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt,
2, SND_PCM_FORMAT_S16_LE, samples);
}
max_diff = 0;
for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
d = abs(out_shorts_c[i] - out_shorts_opt[i]);
if (d > max_diff) {
max_diff = d;
}
}
printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
expected,
max_diff == 0 ? "PASS" :
(out_shorts_opt[0] == expected ?
"EXPECTED DIFFERENCE" :
"UNEXPECTED DIFFERENCE"));
/* measure reference C deinterleave */
dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
samples);
/* measure optimized deinterleave */
dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2,
SND_PCM_FORMAT_S16_LE, samples);
d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
if (d)
printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
out_floats_ptr_opt[0][0]);
d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
if (d)
printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
out_floats_ptr_opt[1][0]);
free(in_shorts);
free(out_floats_left_c);
free(out_floats_right_c);
free(out_floats_left_opt);
free(out_floats_right_opt);
free(out_shorts_c);
free(out_shorts_opt);
}
int main(int argc, char **argv)
{
float e = 0.000000001f;
int samples = 16;
dsp_enable_flush_denormal_to_zero();
// Print headings for TestRounding output.
printf("test interleave compare maxdif, float, float * 32k "
"C SIMD expect pass\n");
// test clamping
TestRounding(1.0f, 32767, samples);
TestRounding(-1.0f, -32768, samples);
TestRounding(1.1f, 32767, samples);
TestRounding(-1.1f, -32768, samples);
TestRounding(2000000000.f / 32768.f, 32767, samples);
TestRounding(-2000000000.f / 32768.f, -32768, samples);
/* Infinity produces zero on arm64. */
#if defined(__aarch64__)
#define EXPECTED_INF_RESULT 0
#define EXPECTED_NEGINF_RESULT 0
#elif defined(__i386__) || defined(__x86_64__)
#define EXPECTED_INF_RESULT -32768
#define EXPECTED_NEGINF_RESULT 0
#else
#define EXPECTED_INF_RESULT 32767
#define EXPECTED_NEGINF_RESULT -32768
#endif
TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
// test infinity
union ieee754_float inf;
inf.ieee.negative = 0;
inf.ieee.exponent = 0xfe;
inf.ieee.mantissa = 0x7fffff;
TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail
inf.ieee.negative = 1;
inf.ieee.exponent = 0xfe;
inf.ieee.mantissa = 0x7fffff;
TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail
// test rounding
TestRounding(0.25f, 8192, samples);
TestRounding(-0.25f, -8192, samples);
TestRounding(0.50f, 16384, samples);
TestRounding(-0.50f, -16384, samples);
TestRounding(1.0f / 32768.0f, 1, samples);
TestRounding(-1.0f / 32768.0f, -1, samples);
TestRounding(1.0f / 32768.0f + e, 1, samples);
TestRounding(-1.0f / 32768.0f - e, -1, samples);
TestRounding(1.0f / 32768.0f - e, 1, samples);
TestRounding(-1.0f / 32768.0f + e, -1, samples);
/* Rounding on 'tie' is different for Intel. */
#if defined(__i386__) || defined(__x86_64__)
TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */
TestRounding(-0.5f / 32768.0f, 0, samples);
#else
TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */
TestRounding(-0.5f / 32768.0f, -1, samples);
#endif
TestRounding(0.5f / 32768.0f + e, 1, samples);
TestRounding(-0.5f / 32768.0f - e, 1, samples);
TestRounding(0.5f / 32768.0f - e, 0, samples);
TestRounding(-0.5f / 32768.0f + e, 0, samples);
TestRounding(1.5f / 32768.0f, 2, samples);
TestRounding(-1.5f / 32768.0f, -2, samples);
TestRounding(1.5f / 32768.0f + e, 2, samples);
TestRounding(-1.5f / 32768.0f - e, -2, samples);
TestRounding(1.5f / 32768.0f - e, 1, samples);
TestRounding(-1.5f / 32768.0f + e, -1, samples);
/* Test denormals */
union ieee754_float denorm;
denorm.ieee.negative = 0;
denorm.ieee.exponent = 0;
denorm.ieee.mantissa = 1;
TestRounding(denorm.f, 0, samples);
denorm.ieee.negative = 1;
denorm.ieee.exponent = 0;
denorm.ieee.mantissa = 1;
TestRounding(denorm.f, 0, samples);
/* Test NaNs. Caveat Results vary by implementation. */
#if defined(__i386__) || defined(__x86_64__)
#define EXPECTED_NAN_RESULT -32768
#else
#define EXPECTED_NAN_RESULT 0
#endif
union ieee754_float nan; /* Quiet NaN */
nan.ieee.negative = 0;
nan.ieee.exponent = 0xff;
nan.ieee.mantissa = 0x400001;
TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
nan.ieee.negative = 0;
nan.ieee.exponent = 0xff;
nan.ieee.mantissa = 0x000001; /* Signalling NaN */
TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
/* Test Performance */
uint64_t diff;
struct timespec start, end;
int i;
int d;
short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
float *out_floats_ptr_c[2];
float *out_floats_ptr_opt[2];
out_floats_ptr_c[0] = out_floats_left_c;
out_floats_ptr_c[1] = out_floats_right_c;
out_floats_ptr_opt[0] = out_floats_left_opt;
out_floats_ptr_opt[1] = out_floats_right_opt;
/* Benchmark dsp_util_interleave */
for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
/* measure original C interleave */
clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
for (i = 0; i < ITERATIONS; ++i) {
dsp_util_interleave_reference(out_floats_ptr_c,
out_shorts_c, 2, samples);
}
clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
start.tv_nsec) /
1000000;
printf("interleave ORIG size = %6d, elapsed time = %llu ms\n",
samples, (long long unsigned int)diff);
/* measure optimized interleave */
clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
for (i = 0; i < ITERATIONS; ++i) {
dsp_util_interleave(out_floats_ptr_c,
(uint8_t *)out_shorts_opt, 2,
SND_PCM_FORMAT_S16_LE, samples);
}
clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
start.tv_nsec) /
1000000;
printf("interleave SIMD size = %6d, elapsed time = %llu ms\n",
samples, (long long unsigned int)diff);
/* Test C and SIMD output match */
d = memcmp(out_shorts_c, out_shorts_opt,
MAXSAMPLES * 2 * 2 + PAD);
if (d)
printf("interleave compare %d, %d %d, %d %d\n", d,
out_shorts_c[0], out_shorts_c[1],
out_shorts_opt[0], out_shorts_opt[1]);
}
/* Benchmark dsp_util_deinterleave */
for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
/* Measure original C deinterleave */
clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
for (i = 0; i < ITERATIONS; ++i) {
dsp_util_deinterleave_reference(
in_shorts, out_floats_ptr_c, 2, samples);
}
clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
start.tv_nsec) /
1000000;
printf("deinterleave ORIG size = %6d, "
"elapsed time = %llu ms\n",
samples, (long long unsigned int)diff);
/* Measure optimized deinterleave */
clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
for (i = 0; i < ITERATIONS; ++i) {
dsp_util_deinterleave((uint8_t *)in_shorts,
out_floats_ptr_opt, 2,
SND_PCM_FORMAT_S16_LE, samples);
}
clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
start.tv_nsec) /
1000000;
printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
samples, (long long unsigned int)diff);
/* Test C and SIMD output match */
d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
samples * 4);
if (d)
printf("left compare %d, %f %f\n", d,
out_floats_ptr_c[0][0],
out_floats_ptr_opt[0][0]);
d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
samples * 4);
if (d)
printf("right compare %d, %f %f\n", d,
out_floats_ptr_c[1][0],
out_floats_ptr_opt[1][0]);
}
free(in_shorts);
free(out_floats_left_c);
free(out_floats_right_c);
free(out_floats_left_opt);
free(out_floats_right_opt);
free(out_shorts_c);
free(out_shorts_opt);
return 0;
}