embdrv/lc3/Common/DctIV.cpp - platform/system/bt - Git at Google

 /*
  * DctIV.cpp
  *
  * Copyright 2021 HIMSA II K/S - www.himsa.com. Represented by EHIMA -
  * www.ehima.com
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "DctIV.hpp"

 #include <cmath>

 /*
  * Notes on the choice of the applied FFT library:
  *  - Different fast transform implementations for the DCT-IV can be selected
  * via USE_FFTW USE_FFTW_FOR_FFT  // assume NF being 4 times an integer
  *      USE_KISSFFT       // assume NF being 4 times an integer
  *     where only one of them is meaningful to be defined
  *
  *  - Defining none of the fast transforms will lead to a direct implementation
  *     which is good for testing but usually to slow for practial applications
  *
  *  - USE_FFTW: The free "fftw" library should be fastest, but may be more
  * complicated to be build for embedded target devices. It provides dedicated
  * optimization for the DCT-IV and thus is preferred from a technical
  * perspective. In terms of licences "fftw" is free in the sense of GPL, which
  * is not liked in some products. There might be the option to ask the authors
  * for other licences (which they propose). Nevertheless, the "license" issue
  *     has driven us to not bundle "fftw" directly with the LC3 implementation,
  * although the API for using it is provided and tested.
  *
  *  - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using
  * the complex fft provided by "fftw" instead of the dedicated optimized DCT-IV
  * implementation. This option is mainly provided to demonstrate the transition
  * from USE_FFTW to USE_KISSFFT
  *
  *  - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw",
  * but gives a majority of the possible performance gain over a "stupid" direct
  * DCT-IV implementation. "kissfft" does not provide a dedicated optimization
  * for DCT-IV so that some additional code for implementing a DCT-IV by a
  * complex fft has to be added. This also implies additional memory needed. The
  * big advantage of the "kissfft" is that it is less restrictive in terms of its
  * license. Further, the template based C++ implementation reduces its impact on
  * the build system to a minimum (we just need to include the right header
  * without needing to compile and link a separate file).
  *
  *   - USE_KISSFFT is defined below to be the default choice. The other
  * alternatives may be selecting by providing its define via the compiler switch
  * -D<define>
  *
  *   - The impact of the defines is restricted to this *.cpp file for clarity
  * reasons. Ok, to make this work, we needed a few "ugly" pointer casts, but we
  * really wanted to make sure, that no other code or build impact is generated
  * by the options in this file.
  */
 #define USE_OWN_FFT

 #if defined USE_FFTW || defined USE_FFTW_FOR_FFT
 #include <fftw3.h>

 #elif defined USE_KISSFFT
 #include "kissfft.hh"

 class KissfftConfig {
  public:
   KissfftConfig(uint16_t N) : inbuf(nullptr), outbuf(nullptr), fft(nullptr) {
     inbuf = new std::complex<double>[N];
     outbuf = new std::complex<double>[N];
     twiddle = new std::complex<double>[N];
     fft = new kissfft<double>(N, false);

     const double pi = std::acos(-1);
     for (uint16_t n = 0; n < N; n++) {
       twiddle[n] =
           std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
                                std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
     }
   }

   ~KissfftConfig() {
     if (nullptr != fft) {
       delete fft;
     }
     if (nullptr != inbuf) {
       delete[] inbuf;
     }
     if (nullptr != outbuf) {
       delete[] outbuf;
     }
     if (nullptr != twiddle) {
       delete[] twiddle;
     }
   }

   void transform() {
     if (nullptr != fft) {
       fft->transform(inbuf, outbuf);
     }
   }

   std::complex<double>* inbuf;
   std::complex<double>* outbuf;
   std::complex<double>* twiddle;
   kissfft<double>* fft;
 };

 #elif defined USE_OWN_FFT
 #include <complex>

 #include "fft.h"
 #endif

 DctIVDbl::DctIVDbl(uint16_t NF_)
     : NF(NF_), in(nullptr), out(nullptr), dctIVconfig(nullptr) {
   in = new double[NF];
   out = new double[NF];

 #if defined USE_FFTW
   dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);

 #elif defined USE_FFTW_FOR_FFT
   dctIVconfig = fftw_plan_dft_1d(NF / 2, reinterpret_cast<fftw_complex*>(in),
                                  reinterpret_cast<fftw_complex*>(out),
                                  FFTW_FORWARD, FFTW_ESTIMATE);

 #elif defined USE_KISSFFT
   dctIVconfig = new KissfftConfig(NF / 2);

 #elif defined USE_OWN_FFT

   int N = NF / 2;
   std::complex<double>* twiddle = new std::complex<double>[N];
   dctIVconfig = twiddle;
   const double pi = std::acos(-1);
   for (uint16_t n = 0; n < N; n++) {
     twiddle[n] =
         std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
                              std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
   }

 #endif

   for (uint16_t n = 0; n < NF; n++) {
     in[n] = 0;
     out[n] = 0;
   }
 }

 DctIVDbl::~DctIVDbl() {
 #if defined USE_FFTW || defined USE_FFTW_FOR_FFT
   if (nullptr != dctIVconfig) {
     fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
   }

 #elif defined USE_KISSFFT
   if (nullptr != dctIVconfig) {
     KissfftConfig* kissfftConfig =
         reinterpret_cast<KissfftConfig*>(dctIVconfig);
     delete kissfftConfig;
   }

 #elif defined USE_OWN_FFT
   std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
   delete[] twiddle;

 #endif
   if (nullptr != in) {
     delete[] in;
   }
   if (nullptr != out) {
     delete[] out;
   }
 }

 void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X) {
   const double pi = std::acos(-1);
   for (uint16_t k = 0; k < N; k++) {
     X[k] = 0;
     for (uint16_t n = 0; n < N; n++) {
       X[k] += tw[n] * std::cos(pi / N * (n + 0.5) * (k + 0.5));
     }
     X[k] *= 2;
   }
 }

 void DctIVDbl::run() {
 #ifdef USE_FFTW
   fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

 #elif defined USE_FFTW_FOR_FFT
   const double pi = std::acos(-1);
   // assume NF being 4 times an integer
   for (uint16_t n = 1; n < NF / 2; n += 2) {
     double buffer;
     buffer = in[n];
     in[n] = in[NF - n];
     in[NF - n] = buffer;
   }
   for (uint16_t n = 0; n < NF; n += 2) {
     double real = in[n + 0];
     double imag = in[n + 1];
     in[n + 0] = real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
                 imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF));
     in[n + 1] = real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
                 imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF));
   }

   fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

   for (uint16_t n = 0; n < NF; n += 2) {
     double real = out[n + 0];
     double imag = out[n + 1];
     out[n + 0] = 2 * (real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
                       imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF)));
     out[n + 1] = 2 * (real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
                       imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF)));
   }
   for (uint16_t n = 1; n < NF / 2; n += 2) {
     double buffer;
     buffer = out[n];
     out[n] = -out[NF - n];
     out[NF - n] = -buffer;
   }

 #elif defined USE_KISSFFT
   // assume NF being 4 times an integer
   KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
   for (uint16_t n = 0; n < NF / 2; n++) {
     kissfftConfig->inbuf[n] =
         kissfftConfig->twiddle[n] *
         std::complex<double>(in[2 * n], in[NF - 2 * n - 1]);
   }

   kissfftConfig->transform();

   for (uint16_t n = 0; n < NF / 2; n++) {
     std::complex<double> complexOut =
         kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
     out[2 * n] = complexOut.real() * 2;
     out[NF - 2 * n - 1] = -complexOut.imag() * 2;
   }

 #elif defined USE_OWN_FFT

   fft_complex inbuf[NF / 2];
   fft_complex outbuf[NF / 2];

   // assume NF being 4 times an integer
   for (uint16_t n = 1; n < NF / 2; n += 2) {
     double buffer;
     buffer = in[n];
     in[n] = in[NF - n];
     in[NF - n] = buffer;
   }

   std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
   for (uint16_t n = 0; n < NF / 2; n++) {
     double real = in[2 * n + 0];
     double imag = in[2 * n + 1];
     in[2 * n + 0] = real * twiddle[n].real() - imag * twiddle[n].imag();
     in[2 * n + 1] = real * twiddle[n].imag() + imag * twiddle[n].real();
   }

   for (uint16_t n = 0; n < NF / 2; n++) {
     inbuf[n].re = in[2 * n];
     inbuf[n].im = in[2 * n + 1];
   }

   fft_complex* actal_output = fft(false, inbuf, NF / 2, inbuf, outbuf);

   for (uint16_t n = 0; n < NF / 2; n++) {
     double real = actal_output[n].re;
     double imag = actal_output[n].im;
     out[2 * n + 0] = 2 * (real * twiddle[n].real() - imag * twiddle[n].imag());
     out[2 * n + 1] = 2 * (real * twiddle[n].imag() + imag * twiddle[n].real());
   }

   for (uint16_t n = 1; n < NF / 2; n += 2) {
     double buffer;
     buffer = out[n];
     out[n] = -out[NF - n];
     out[NF - n] = -buffer;
   }

 #else
   DctIVDirectDbl(NF, in, out);
 #endif
 }
	/*
	* DctIV.cpp
	*
	* Copyright 2021 HIMSA II K/S - www.himsa.com. Represented by EHIMA -
	* www.ehima.com
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "DctIV.hpp"

	#include <cmath>

	/*
	* Notes on the choice of the applied FFT library:
	* - Different fast transform implementations for the DCT-IV can be selected
	* via USE_FFTW USE_FFTW_FOR_FFT // assume NF being 4 times an integer
	* USE_KISSFFT // assume NF being 4 times an integer
	* where only one of them is meaningful to be defined
	*
	* - Defining none of the fast transforms will lead to a direct implementation
	* which is good for testing but usually to slow for practial applications
	*
	* - USE_FFTW: The free "fftw" library should be fastest, but may be more
	* complicated to be build for embedded target devices. It provides dedicated
	* optimization for the DCT-IV and thus is preferred from a technical
	* perspective. In terms of licences "fftw" is free in the sense of GPL, which
	* is not liked in some products. There might be the option to ask the authors
	* for other licences (which they propose). Nevertheless, the "license" issue
	* has driven us to not bundle "fftw" directly with the LC3 implementation,
	* although the API for using it is provided and tested.
	*
	* - USE_FFTW_FOR_FFT: this is a not fully optimized alternative approach using
	* the complex fft provided by "fftw" instead of the dedicated optimized DCT-IV
	* implementation. This option is mainly provided to demonstrate the transition
	* from USE_FFTW to USE_KISSFFT
	*
	* - USE_KISSFFT: the free "kissfft" library is sligthly slower than "fftw",
	* but gives a majority of the possible performance gain over a "stupid" direct
	* DCT-IV implementation. "kissfft" does not provide a dedicated optimization
	* for DCT-IV so that some additional code for implementing a DCT-IV by a
	* complex fft has to be added. This also implies additional memory needed. The
	* big advantage of the "kissfft" is that it is less restrictive in terms of its
	* license. Further, the template based C++ implementation reduces its impact on
	* the build system to a minimum (we just need to include the right header
	* without needing to compile and link a separate file).
	*
	* - USE_KISSFFT is defined below to be the default choice. The other
	* alternatives may be selecting by providing its define via the compiler switch
	* -D<define>
	*
	* - The impact of the defines is restricted to this *.cpp file for clarity
	* reasons. Ok, to make this work, we needed a few "ugly" pointer casts, but we
	* really wanted to make sure, that no other code or build impact is generated
	* by the options in this file.
	*/
	#define USE_OWN_FFT

	#if defined USE_FFTW \|\| defined USE_FFTW_FOR_FFT
	#include <fftw3.h>

	#elif defined USE_KISSFFT
	#include "kissfft.hh"

	class KissfftConfig {
	public:
	KissfftConfig(uint16_t N) : inbuf(nullptr), outbuf(nullptr), fft(nullptr) {
	inbuf = new std::complex<double>[N];
	outbuf = new std::complex<double>[N];
	twiddle = new std::complex<double>[N];
	fft = new kissfft<double>(N, false);

	const double pi = std::acos(-1);
	for (uint16_t n = 0; n < N; n++) {
	twiddle[n] =
	std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
	std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
	}
	}

	~KissfftConfig() {
	if (nullptr != fft) {
	delete fft;
	}
	if (nullptr != inbuf) {
	delete[] inbuf;
	}
	if (nullptr != outbuf) {
	delete[] outbuf;
	}
	if (nullptr != twiddle) {
	delete[] twiddle;
	}
	}

	void transform() {
	if (nullptr != fft) {
	fft->transform(inbuf, outbuf);
	}
	}

	std::complex<double>* inbuf;
	std::complex<double>* outbuf;
	std::complex<double>* twiddle;
	kissfft<double>* fft;
	};

	#elif defined USE_OWN_FFT
	#include <complex>

	#include "fft.h"
	#endif

	DctIVDbl::DctIVDbl(uint16_t NF_)
	: NF(NF_), in(nullptr), out(nullptr), dctIVconfig(nullptr) {
	in = new double[NF];
	out = new double[NF];

	#if defined USE_FFTW
	dctIVconfig = fftw_plan_r2r_1d(NF, in, out, FFTW_REDFT11, FFTW_ESTIMATE);

	#elif defined USE_FFTW_FOR_FFT
	dctIVconfig = fftw_plan_dft_1d(NF / 2, reinterpret_cast<fftw_complex*>(in),
	reinterpret_cast<fftw_complex*>(out),
	FFTW_FORWARD, FFTW_ESTIMATE);

	#elif defined USE_KISSFFT
	dctIVconfig = new KissfftConfig(NF / 2);

	#elif defined USE_OWN_FFT

	int N = NF / 2;
	std::complex<double>* twiddle = new std::complex<double>[N];
	dctIVconfig = twiddle;
	const double pi = std::acos(-1);
	for (uint16_t n = 0; n < N; n++) {
	twiddle[n] =
	std::complex<double>(std::cos(-pi * (8 * n + 1) / (8.0 * N * 2)),
	std::sin(-pi * (8 * n + 1) / (8.0 * N * 2)));
	}

	#endif

	for (uint16_t n = 0; n < NF; n++) {
	in[n] = 0;
	out[n] = 0;
	}
	}

	DctIVDbl::~DctIVDbl() {
	#if defined USE_FFTW \|\| defined USE_FFTW_FOR_FFT
	if (nullptr != dctIVconfig) {
	fftw_destroy_plan(reinterpret_cast<fftw_plan>(dctIVconfig));
	}

	#elif defined USE_KISSFFT
	if (nullptr != dctIVconfig) {
	KissfftConfig* kissfftConfig =
	reinterpret_cast<KissfftConfig*>(dctIVconfig);
	delete kissfftConfig;
	}

	#elif defined USE_OWN_FFT
	std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
	delete[] twiddle;

	#endif
	if (nullptr != in) {
	delete[] in;
	}
	if (nullptr != out) {
	delete[] out;
	}
	}

	void DctIVDirectDbl(uint16_t N, const double* const tw, double* const X) {
	const double pi = std::acos(-1);
	for (uint16_t k = 0; k < N; k++) {
	X[k] = 0;
	for (uint16_t n = 0; n < N; n++) {
	X[k] += tw[n] * std::cos(pi / N * (n + 0.5) * (k + 0.5));
	}
	X[k] *= 2;
	}
	}

	void DctIVDbl::run() {
	#ifdef USE_FFTW
	fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

	#elif defined USE_FFTW_FOR_FFT
	const double pi = std::acos(-1);
	// assume NF being 4 times an integer
	for (uint16_t n = 1; n < NF / 2; n += 2) {
	double buffer;
	buffer = in[n];
	in[n] = in[NF - n];
	in[NF - n] = buffer;
	}
	for (uint16_t n = 0; n < NF; n += 2) {
	double real = in[n + 0];
	double imag = in[n + 1];
	in[n + 0] = real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
	imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF));
	in[n + 1] = real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
	imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF));
	}

	fftw_execute(reinterpret_cast<fftw_plan>(dctIVconfig));

	for (uint16_t n = 0; n < NF; n += 2) {
	double real = out[n + 0];
	double imag = out[n + 1];
	out[n + 0] = 2 * (real * std::cos(-pi * (4 * n + 1) / (8.0 * NF)) -
	imag * std::sin(-pi * (4 * n + 1) / (8.0 * NF)));
	out[n + 1] = 2 * (real * std::sin(-pi * (4 * n + 1) / (8.0 * NF)) +
	imag * std::cos(-pi * (4 * n + 1) / (8.0 * NF)));
	}
	for (uint16_t n = 1; n < NF / 2; n += 2) {
	double buffer;
	buffer = out[n];
	out[n] = -out[NF - n];
	out[NF - n] = -buffer;
	}

	#elif defined USE_KISSFFT
	// assume NF being 4 times an integer
	KissfftConfig* kissfftConfig = reinterpret_cast<KissfftConfig*>(dctIVconfig);
	for (uint16_t n = 0; n < NF / 2; n++) {
	kissfftConfig->inbuf[n] =
	kissfftConfig->twiddle[n] *
	std::complex<double>(in[2 * n], in[NF - 2 * n - 1]);
	}

	kissfftConfig->transform();

	for (uint16_t n = 0; n < NF / 2; n++) {
	std::complex<double> complexOut =
	kissfftConfig->twiddle[n] * kissfftConfig->outbuf[n];
	out[2 * n] = complexOut.real() * 2;
	out[NF - 2 * n - 1] = -complexOut.imag() * 2;
	}

	#elif defined USE_OWN_FFT

	fft_complex inbuf[NF / 2];
	fft_complex outbuf[NF / 2];

	// assume NF being 4 times an integer
	for (uint16_t n = 1; n < NF / 2; n += 2) {
	double buffer;
	buffer = in[n];
	in[n] = in[NF - n];
	in[NF - n] = buffer;
	}

	std::complex<double>* twiddle = (std::complex<double>*)dctIVconfig;
	for (uint16_t n = 0; n < NF / 2; n++) {
	double real = in[2 * n + 0];
	double imag = in[2 * n + 1];
	in[2 * n + 0] = real * twiddle[n].real() - imag * twiddle[n].imag();
	in[2 * n + 1] = real * twiddle[n].imag() + imag * twiddle[n].real();
	}

	for (uint16_t n = 0; n < NF / 2; n++) {
	inbuf[n].re = in[2 * n];
	inbuf[n].im = in[2 * n + 1];
	}

	fft_complex* actal_output = fft(false, inbuf, NF / 2, inbuf, outbuf);

	for (uint16_t n = 0; n < NF / 2; n++) {
	double real = actal_output[n].re;
	double imag = actal_output[n].im;
	out[2 * n + 0] = 2 * (real * twiddle[n].real() - imag * twiddle[n].imag());
	out[2 * n + 1] = 2 * (real * twiddle[n].imag() + imag * twiddle[n].real());
	}

	for (uint16_t n = 1; n < NF / 2; n += 2) {
	double buffer;
	buffer = out[n];
	out[n] = -out[NF - n];
	out[NF - n] = -buffer;
	}

	#else
	DctIVDirectDbl(NF, in, out);
	#endif
	}