blob: 798322e57fda5e425cf3fbc19e9dd3d0415f7870 [file] [log] [blame]
/*
* DctIV.cpp
*
* Copyright 2021 HIMSA II K/S - www.himsa.com. Represented by EHIMA -
* www.ehima.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "DctIV.hpp"
#include <cmath>
/*
* Notes on the choice of the applied FFT library:
* - Different fast transform implementations for the DCT-IV can be selected
* via USE_FFTW USE_FFTW_FOR_FFT // assume NF being 4 times an integer
* USE_KISSFFT // assume NF being 4 times an integer
* where only one of them is meaningful to be defined
*
* - Defining none of the fast transforms will lead to a direct implementation
* which is good for testing but usually to slow for practial applications
*
* - USE_FFTW: The free "fftw" library should be fastest, but may be more
* complicated to be build for embedded target devices. It provides dedicated
* optimization for the DCT-IV and thus is preferred from a technical
* perspective. In terms of licences "fftw" is free in the sense of GPL, which
* is not liked in some products. There might be the option to ask the authors
* for other licences (which they propose). Nevertheless, the "license" issue
* has driven us to not bundle "fftw" directly with the LC3 implementation,
* although the API for using it is provided and tested.
*
* - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using
* the complex fft provided by "fftw" instead of the dedicated optimized DCT-IV
* implementation. This option is mainly provided to demonstrate the transition
* from USE_FFTW to USE_KISSFFT
*
* - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw",
* but gives a majority of the possible performance gain over a "stupid" direct
* DCT-IV implementation. "kissfft" does not provide a dedicated optimization
* for DCT-IV so that some additional code for implementing a DCT-IV by a
* complex fft has to be added. This also implies additional memory needed. The
* big advantage of the "kissfft" is that it is less restrictive in terms of its
* license. Further, the template based C++ implementation reduces its impact on
* the build system to a minimum (we just need to include the right header
* without needing to compile and link a separate file).
*
* - USE_KISSFFT is defined below to be the default choice. The other
* alternatives may be selecting by providing its define via the compiler switch
* -D<define>
*
* - The impact of the defines is restricted to this *.cpp file for clarity
* reasons. Ok, to make this work, we needed a few "ugly" pointer casts, but we
* really wanted to make sure, that no other code or build impact is generated
* by the options in this file.
*/
#define USE_OWN_FFT
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
#include <fftw3.h>
#elif defined USE_KISSFFT
#include "kissfft.hh"
class KissfftConfig {
public:
KissfftConfig(uint16_t N) : inbuf(nullptr), outbuf(nullptr), fft(nullptr) {
inbuf = new std::complex<double>[N];
outbuf = new std::complex<double>[N];
twiddle = new std::complex<double>[N];
fft = new kissfft<double>(N, false);
const double pi = std::acos(-1);
for (uint16_t n = 0; n < N; n++) {
twiddle[n] =
std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
}
}
~KissfftConfig() {
if (nullptr != fft) {
delete fft;
}
if (nullptr != inbuf) {
delete[] inbuf;
}
if (nullptr != outbuf) {
delete[] outbuf;
}
if (nullptr != twiddle) {
delete[] twiddle;
}
}
void transform() {
if (nullptr != fft) {
fft->transform(inbuf, outbuf);
}
}
std::complex<double>* inbuf;
std::complex<double>* outbuf;
std::complex<double>* twiddle;
kissfft<double>* fft;
};
#elif defined USE_OWN_FFT
#include <complex>
#include "fft.h"
#endif
DctIVDbl::DctIVDbl(uint16_t NF_)
: NF(NF_), in(nullptr), out(nullptr), dctIVconfig(nullptr) {
in = new double[NF];
out = new double[NF];
#if defined USE_FFTW
dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);
#elif defined USE_FFTW_FOR_FFT
dctIVconfig = fftw_plan_dft_1d(NF / 2, reinterpret_cast<fftw_complex*>(in),
reinterpret_cast<fftw_complex*>(out),
FFTW_FORWARD, FFTW_ESTIMATE);
#elif defined USE_KISSFFT
dctIVconfig = new KissfftConfig(NF / 2);
#elif defined USE_OWN_FFT
int N = NF / 2;
std::complex<double>* twiddle = new std::complex<double>[N];
dctIVconfig = twiddle;
const double pi = std::acos(-1);
for (uint16_t n = 0; n < N; n++) {
twiddle[n] =
std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
}
#endif
for (uint16_t n = 0; n < NF; n++) {
in[n] = 0;
out[n] = 0;
}
}
DctIVDbl::~DctIVDbl() {
#if defined USE_FFTW || defined USE_FFTW_FOR_FFT
if (nullptr != dctIVconfig) {
fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
}
#elif defined USE_KISSFFT
if (nullptr != dctIVconfig) {
KissfftConfig* kissfftConfig =
reinterpret_cast<KissfftConfig*>(dctIVconfig);
delete kissfftConfig;
}
#elif defined USE_OWN_FFT
std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
delete[] twiddle;
#endif
if (nullptr != in) {
delete[] in;
}
if (nullptr != out) {
delete[] out;
}
}
void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X) {
const double pi = std::acos(-1);
for (uint16_t k = 0; k < N; k++) {
X[k] = 0;
for (uint16_t n = 0; n < N; n++) {
X[k] += tw[n] * std::cos(pi / N * (n + 0.5) * (k + 0.5));
}
X[k] *= 2;
}
}
void DctIVDbl::run() {
#ifdef USE_FFTW
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
#elif defined USE_FFTW_FOR_FFT
const double pi = std::acos(-1);
// assume NF being 4 times an integer
for (uint16_t n = 1; n < NF / 2; n += 2) {
double buffer;
buffer = in[n];
in[n] = in[NF - n];
in[NF - n] = buffer;
}
for (uint16_t n = 0; n < NF; n += 2) {
double real = in[n + 0];
double imag = in[n + 1];
in[n + 0] = real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF));
in[n + 1] = real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF));
}
fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));
for (uint16_t n = 0; n < NF; n += 2) {
double real = out[n + 0];
double imag = out[n + 1];
out[n + 0] = 2 * (real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF)));
out[n + 1] = 2 * (real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF)));
}
for (uint16_t n = 1; n < NF / 2; n += 2) {
double buffer;
buffer = out[n];
out[n] = -out[NF - n];
out[NF - n] = -buffer;
}
#elif defined USE_KISSFFT
// assume NF being 4 times an integer
KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
for (uint16_t n = 0; n < NF / 2; n++) {
kissfftConfig->inbuf[n] =
kissfftConfig->twiddle[n] *
std::complex<double>(in[2 * n], in[NF - 2 * n - 1]);
}
kissfftConfig->transform();
for (uint16_t n = 0; n < NF / 2; n++) {
std::complex<double> complexOut =
kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
out[2 * n] = complexOut.real() * 2;
out[NF - 2 * n - 1] = -complexOut.imag() * 2;
}
#elif defined USE_OWN_FFT
fft_complex inbuf[NF / 2];
fft_complex outbuf[NF / 2];
// assume NF being 4 times an integer
for (uint16_t n = 1; n < NF / 2; n += 2) {
double buffer;
buffer = in[n];
in[n] = in[NF - n];
in[NF - n] = buffer;
}
std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
for (uint16_t n = 0; n < NF / 2; n++) {
double real = in[2 * n + 0];
double imag = in[2 * n + 1];
in[2 * n + 0] = real * twiddle[n].real() - imag * twiddle[n].imag();
in[2 * n + 1] = real * twiddle[n].imag() + imag * twiddle[n].real();
}
for (uint16_t n = 0; n < NF / 2; n++) {
inbuf[n].re = in[2 * n];
inbuf[n].im = in[2 * n + 1];
}
fft_complex* actal_output = fft(false, inbuf, NF / 2, inbuf, outbuf);
for (uint16_t n = 0; n < NF / 2; n++) {
double real = actal_output[n].re;
double imag = actal_output[n].im;
out[2 * n + 0] = 2 * (real * twiddle[n].real() - imag * twiddle[n].imag());
out[2 * n + 1] = 2 * (real * twiddle[n].imag() + imag * twiddle[n].real());
}
for (uint16_t n = 1; n < NF / 2; n += 2) {
double buffer;
buffer = out[n];
out[n] = -out[NF - n];
out[NF - n] = -buffer;
}
#else
DctIVDirectDbl(NF, in, out);
#endif
}