webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc - platform/external/webrtc - Git at Google

 /*
  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #define _USE_MATH_DEFINES

 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

 #include <algorithm>
 #include <cmath>
 #include <numeric>
 #include <vector>

 #include "webrtc/base/arraysize.h"
 #include "webrtc/common_audio/window_generator.h"
 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"

 namespace webrtc {
 namespace {

 // Alpha for the Kaiser Bessel Derived window.
 const float kKbdAlpha = 1.5f;

 // The minimum value a post-processing mask can take.
 const float kMaskMinimum = 0.01f;

 const float kSpeedOfSoundMeterSeconds = 343;

 // For both target and interference angles, PI / 2 is perpendicular to the
 // microphone array, facing forwards. The positive direction goes
 // counterclockwise.
 // The angle at which we amplify sound.
 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;

 // The angle at which we suppress sound. Suppression is symmetric around PI / 2
 // radians, so sound is suppressed at both +|kInterfAngleRadians| and
 // PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
 // suppress sound coming from close angles as well.
 const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;

 // When calculating the interference covariance matrix, this is the weight for
 // the weighted average between the uniform covariance matrix and the angled
 // covariance matrix.
 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
 const float kBalance = 0.4f;

 // TODO(claguna): need comment here.
 const float kBeamwidthConstant = 0.00002f;

 // Alpha coefficients for mask smoothing.
 const float kMaskTimeSmoothAlpha = 0.2f;
 const float kMaskFrequencySmoothAlpha = 0.6f;

 // The average mask is computed from masks in this mid-frequency range. If these
 // ranges are changed |kMaskQuantile| might need to be adjusted.
 const int kLowMeanStartHz = 200;
 const int kLowMeanEndHz = 400;

 const int kHighMeanStartHz = 3000;
 const int kHighMeanEndHz = 5000;

 // Quantile of mask values which is used to estimate target presence.
 const float kMaskQuantile = 0.7f;
 // Mask threshold over which the data is considered signal and not interference.
 const float kMaskTargetThreshold = 0.3f;
 // Time in seconds after which the data is considered interference if the mask
 // does not pass |kMaskTargetThreshold|.
 const float kHoldTargetSeconds = 0.25f;

 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
 // used; to accomplish this, we compute both multiplications in the same loop.
 // The returned norm is clamped to be non-negative.
 float Norm(const ComplexMatrix<float>& mat,
            const ComplexMatrix<float>& norm_mat) {
   CHECK_EQ(norm_mat.num_rows(), 1);
   CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
   CHECK_EQ(norm_mat.num_columns(), mat.num_columns());

   complex<float> first_product = complex<float>(0.f, 0.f);
   complex<float> second_product = complex<float>(0.f, 0.f);

   const complex<float>* const* mat_els = mat.elements();
   const complex<float>* const* norm_mat_els = norm_mat.elements();

   for (int i = 0; i < norm_mat.num_columns(); ++i) {
     for (int j = 0; j < norm_mat.num_columns(); ++j) {
       first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
     }
     second_product += first_product * norm_mat_els[0][i];
     first_product = 0.f;
   }
   return std::max(second_product.real(), 0.f);
 }

 // Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
 complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
                                    const ComplexMatrix<float>& rhs) {
   CHECK_EQ(lhs.num_rows(), 1);
   CHECK_EQ(rhs.num_rows(), 1);
   CHECK_EQ(lhs.num_columns(), rhs.num_columns());

   const complex<float>* const* lhs_elements = lhs.elements();
   const complex<float>* const* rhs_elements = rhs.elements();

   complex<float> result = complex<float>(0.f, 0.f);
   for (int i = 0; i < lhs.num_columns(); ++i) {
     result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
   }

   return result;
 }

 // Works for positive numbers only.
 int Round(float x) {
   return std::floor(x + 0.5f);
 }

 // Calculates the sum of absolute values of a complex matrix.
 float SumAbs(const ComplexMatrix<float>& mat) {
   float sum_abs = 0.f;
   const complex<float>* const* mat_els = mat.elements();
   for (int i = 0; i < mat.num_rows(); ++i) {
     for (int j = 0; j < mat.num_columns(); ++j) {
       sum_abs += std::abs(mat_els[i][j]);
     }
   }
   return sum_abs;
 }

 // Calculates the sum of squares of a complex matrix.
 float SumSquares(const ComplexMatrix<float>& mat) {
   float sum_squares = 0.f;
   const complex<float>* const* mat_els = mat.elements();
   for (int i = 0; i < mat.num_rows(); ++i) {
     for (int j = 0; j < mat.num_columns(); ++j) {
       float abs_value = std::abs(mat_els[i][j]);
       sum_squares += abs_value * abs_value;
     }
   }
   return sum_squares;
 }

 // Does |out| = |in|.' * conj(|in|) for row vector |in|.
 void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
                                  ComplexMatrix<float>* out) {
   CHECK_EQ(in.num_rows(), 1);
   CHECK_EQ(out->num_rows(), in.num_columns());
   CHECK_EQ(out->num_columns(), in.num_columns());
   const complex<float>* in_elements = in.elements()[0];
   complex<float>* const* out_elements = out->elements();
   for (int i = 0; i < out->num_rows(); ++i) {
     for (int j = 0; j < out->num_columns(); ++j) {
       out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
     }
   }
 }

 std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
   for (int dim = 0; dim < 3; ++dim) {
     float center = 0.f;
     for (size_t i = 0; i < array_geometry.size(); ++i) {
       center += array_geometry[i].c[dim];
     }
     center /= array_geometry.size();
     for (size_t i = 0; i < array_geometry.size(); ++i) {
       array_geometry[i].c[dim] -= center;
     }
   }
   return array_geometry;
 }

 }  // namespace

 NonlinearBeamformer::NonlinearBeamformer(
     const std::vector<Point>& array_geometry)
   : num_input_channels_(array_geometry.size()),
       array_geometry_(GetCenteredArray(array_geometry)) {
   WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
 }

 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
   chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms);
   sample_rate_hz_ = sample_rate_hz;
   low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
   low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
   high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
   high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
   // These bin indexes determine the regions over which a mean is taken. This
   // is applied as a constant value over the adjacent end "frequency correction"
   // regions.
   //
   //             low_mean_start_bin_     high_mean_start_bin_
   //                   v                         v              constant
   // |----------------|--------|----------------|-------|----------------|
   //   constant               ^                        ^
   //             low_mean_end_bin_       high_mean_end_bin_
   //
   DCHECK_GT(low_mean_start_bin_, 0);
   DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
   DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
   DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
   DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);

   high_pass_postfilter_mask_ = 1.f;
   is_target_present_ = false;
   hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
   interference_blocks_count_ = hold_target_blocks_;


   lapped_transform_.reset(new LappedTransform(num_input_channels_,
                                               1,
                                               chunk_length_,
                                               window_,
                                               kFftSize,
                                               kFftSize / 2,
                                               this));
   for (int i = 0; i < kNumFreqBins; ++i) {
     time_smooth_mask_[i] = 1.f;
     final_mask_[i] = 1.f;
     float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
     wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
     mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
                           kBeamwidthConstant * wave_numbers_[i] *
                           wave_numbers_[i];
   }

   // Initialize all nonadaptive values before looping through the frames.
   InitDelaySumMasks();
   InitTargetCovMats();
   InitInterfCovMats();

   for (int i = 0; i < kNumFreqBins; ++i) {
     rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
     rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
     reflected_rpsiws_[i] =
         Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
   }
 }

 void NonlinearBeamformer::InitDelaySumMasks() {
   for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
     delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
     CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
                                                    kFftSize,
                                                    sample_rate_hz_,
                                                    kSpeedOfSoundMeterSeconds,
                                                    array_geometry_,
                                                    kTargetAngleRadians,
                                                    &delay_sum_masks_[f_ix]);

     complex_f norm_factor = sqrt(
         ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
     delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
     normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
     normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
         normalized_delay_sum_masks_[f_ix]));
   }
 }

 void NonlinearBeamformer::InitTargetCovMats() {
   for (int i = 0; i < kNumFreqBins; ++i) {
     target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
     TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
     complex_f normalization_factor = target_cov_mats_[i].Trace();
     target_cov_mats_[i].Scale(1.f / normalization_factor);
   }
 }

 void NonlinearBeamformer::InitInterfCovMats() {
   for (int i = 0; i < kNumFreqBins; ++i) {
     interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
     ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
     ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

     CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
                                                        array_geometry_,
                                                        &uniform_cov_mat);

     CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
                                                       kInterfAngleRadians,
                                                       i,
                                                       kFftSize,
                                                       kNumFreqBins,
                                                       sample_rate_hz_,
                                                       array_geometry_,
                                                       &angled_cov_mat);
     // Normalize matrices before averaging them.
     complex_f normalization_factor = uniform_cov_mat.Trace();
     uniform_cov_mat.Scale(1.f / normalization_factor);
     normalization_factor = angled_cov_mat.Trace();
     angled_cov_mat.Scale(1.f / normalization_factor);

     // Average matrices.
     uniform_cov_mat.Scale(1 - kBalance);
     angled_cov_mat.Scale(kBalance);
     interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
     reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
   }
 }

 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
                                        ChannelBuffer<float>* output) {
   DCHECK_EQ(input.num_channels(), num_input_channels_);
   DCHECK_EQ(input.num_frames_per_band(), chunk_length_);

   float old_high_pass_mask = high_pass_postfilter_mask_;
   lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
   // Ramp up/down for smoothing. 1 mask per 10ms results in audible
   // discontinuities.
   const float ramp_increment =
       (high_pass_postfilter_mask_ - old_high_pass_mask) /
       input.num_frames_per_band();
   // Apply delay and sum and post-filter in the time domain. WARNING: only works
   // because delay-and-sum is not frequency dependent.
   for (int i = 1; i < input.num_bands(); ++i) {
     float smoothed_mask = old_high_pass_mask;
     for (int j = 0; j < input.num_frames_per_band(); ++j) {
       smoothed_mask += ramp_increment;

       // Applying the delay and sum (at zero degrees, this is equivalent to
       // averaging).
       float sum = 0.f;
       for (int k = 0; k < input.num_channels(); ++k) {
         sum += input.channels(i)[k][j];
       }
       output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
     }
   }
 }

 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
                                             int num_input_channels,
                                             int num_freq_bins,
                                             int num_output_channels,
                                             complex_f* const* output) {
   CHECK_EQ(num_freq_bins, kNumFreqBins);
   CHECK_EQ(num_input_channels, num_input_channels_);
   CHECK_EQ(num_output_channels, 1);

   // Calculating the post-filter masks. Note that we need two for each
   // frequency bin to account for the positive and negative interferer
   // angle.
   for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
     eig_m_.CopyFromColumn(input, i, num_input_channels_);
     float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
     if (eig_m_norm_factor != 0.f) {
       eig_m_.Scale(1.f / eig_m_norm_factor);
     }

     float rxim = Norm(target_cov_mats_[i], eig_m_);
     float ratio_rxiw_rxim = 0.f;
     if (rxim > 0.f) {
       ratio_rxiw_rxim = rxiws_[i] / rxim;
     }

     complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
     rmw *= rmw;
     float rmw_r = rmw.real();

     new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
                                            rpsiws_[i],
                                            ratio_rxiw_rxim,
                                            rmw_r,
                                            mask_thresholds_[i]);

     new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
                                             reflected_rpsiws_[i],
                                             ratio_rxiw_rxim,
                                             rmw_r,
                                             mask_thresholds_[i]);
   }

   ApplyMaskTimeSmoothing();
   EstimateTargetPresence();
   ApplyLowFrequencyCorrection();
   ApplyHighFrequencyCorrection();
   ApplyMaskFrequencySmoothing();
   ApplyMasks(input, output);
 }

 float NonlinearBeamformer::CalculatePostfilterMask(
     const ComplexMatrixF& interf_cov_mat,
     float rpsiw,
     float ratio_rxiw_rxim,
     float rmw_r,
     float mask_threshold) {
   float rpsim = Norm(interf_cov_mat, eig_m_);

   // Find lambda.
   float ratio = 0.f;
   if (rpsim > 0.f) {
     ratio = rpsiw / rpsim;
   }
   float numerator = rmw_r - ratio;
   float denominator = ratio_rxiw_rxim - ratio;

   float mask = 1.f;
   if (denominator > mask_threshold) {
     float lambda = numerator / denominator;
     mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
   }
   return mask;
 }

 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
                                      complex_f* const* output) {
   complex_f* output_channel = output[0];
   for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
     output_channel[f_ix] = complex_f(0.f, 0.f);

     const complex_f* delay_sum_mask_els =
         normalized_delay_sum_masks_[f_ix].elements()[0];
     for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
       output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
     }

     output_channel[f_ix] *= final_mask_[f_ix];
   }
 }

 // Smooth new_mask_ into time_smooth_mask_.
 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
   for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
     time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
                            (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
   }
 }

 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
   // Smooth over frequency in both directions. The "frequency correction"
   // regions have constant value, but we enter them to smooth over the jump
   // that exists at the boundary. However, this does mean when smoothing "away"
   // from the region that we only need to use the last element.
   //
   // Upward smoothing:
   //   low_mean_start_bin_
   //         v
   // |------|------------|------|
   //       ^------------------>^
   //
   // Downward smoothing:
   //         high_mean_end_bin_
   //                    v
   // |------|------------|------|
   //  ^<------------------^
   std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
   for (int i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
     final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
                      (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
   }
   for (int i = high_mean_end_bin_; i >= 0; --i) {
     final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
                      (1 - kMaskFrequencySmoothAlpha) * final_mask_[i + 1];
   }
 }

 // Apply low frequency correction to time_smooth_mask_.
 void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
   const float low_frequency_mask =
       MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
   std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
             low_frequency_mask);
 }

 // Apply high frequency correction to time_smooth_mask_. Update
 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
 void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
   high_pass_postfilter_mask_ =
       MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
   std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
             time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
 }

 // Compute mean over the given range of time_smooth_mask_, [first, last).
 float NonlinearBeamformer::MaskRangeMean(int first, int last) {
   DCHECK_GT(last, first);
   const float sum = std::accumulate(time_smooth_mask_ + first,
                                     time_smooth_mask_ + last, 0.f);
   return sum / (last - first);
 }

 void NonlinearBeamformer::EstimateTargetPresence() {
   const int quantile =
       (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
       low_mean_start_bin_;
   std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
                    new_mask_ + high_mean_end_bin_ + 1);
   if (new_mask_[quantile] > kMaskTargetThreshold) {
     is_target_present_ = true;
     interference_blocks_count_ = 0;
   } else {
     is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#define _USE_MATH_DEFINES

	#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

	#include <algorithm>
	#include <cmath>
	#include <numeric>
	#include <vector>

	#include "webrtc/base/arraysize.h"
	#include "webrtc/common_audio/window_generator.h"
	#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"

	namespace webrtc {
	namespace {

	// Alpha for the Kaiser Bessel Derived window.
	const float kKbdAlpha = 1.5f;

	// The minimum value a post-processing mask can take.
	const float kMaskMinimum = 0.01f;

	const float kSpeedOfSoundMeterSeconds = 343;

	// For both target and interference angles, PI / 2 is perpendicular to the
	// microphone array, facing forwards. The positive direction goes
	// counterclockwise.
	// The angle at which we amplify sound.
	const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;

	// The angle at which we suppress sound. Suppression is symmetric around PI / 2
	// radians, so sound is suppressed at both +\|kInterfAngleRadians\| and
	// PI - \|kInterfAngleRadians\|. Since the beamformer is robust, this should
	// suppress sound coming from close angles as well.
	const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;

	// When calculating the interference covariance matrix, this is the weight for
	// the weighted average between the uniform covariance matrix and the angled
	// covariance matrix.
	// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
	const float kBalance = 0.4f;

	// TODO(claguna): need comment here.
	const float kBeamwidthConstant = 0.00002f;

	// Alpha coefficients for mask smoothing.
	const float kMaskTimeSmoothAlpha = 0.2f;
	const float kMaskFrequencySmoothAlpha = 0.6f;

	// The average mask is computed from masks in this mid-frequency range. If these
	// ranges are changed \|kMaskQuantile\| might need to be adjusted.
	const int kLowMeanStartHz = 200;
	const int kLowMeanEndHz = 400;

	const int kHighMeanStartHz = 3000;
	const int kHighMeanEndHz = 5000;

	// Quantile of mask values which is used to estimate target presence.
	const float kMaskQuantile = 0.7f;
	// Mask threshold over which the data is considered signal and not interference.
	const float kMaskTargetThreshold = 0.3f;
	// Time in seconds after which the data is considered interference if the mask
	// does not pass \|kMaskTargetThreshold\|.
	const float kHoldTargetSeconds = 0.25f;

	// Does conjugate(\|norm_mat\|) * \|mat\| * transpose(\|norm_mat\|). No extra space is
	// used; to accomplish this, we compute both multiplications in the same loop.
	// The returned norm is clamped to be non-negative.
	float Norm(const ComplexMatrix<float>& mat,
	const ComplexMatrix<float>& norm_mat) {
	CHECK_EQ(norm_mat.num_rows(), 1);
	CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
	CHECK_EQ(norm_mat.num_columns(), mat.num_columns());

	complex<float> first_product = complex<float>(0.f, 0.f);
	complex<float> second_product = complex<float>(0.f, 0.f);

	const complex<float>* const* mat_els = mat.elements();
	const complex<float>* const* norm_mat_els = norm_mat.elements();

	for (int i = 0; i < norm_mat.num_columns(); ++i) {
	for (int j = 0; j < norm_mat.num_columns(); ++j) {
	first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
	}
	second_product += first_product * norm_mat_els[0][i];
	first_product = 0.f;
	}
	return std::max(second_product.real(), 0.f);
	}

	// Does conjugate(\|lhs\|) * \|rhs\| for row vectors \|lhs\| and \|rhs\|.
	complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
	const ComplexMatrix<float>& rhs) {
	CHECK_EQ(lhs.num_rows(), 1);
	CHECK_EQ(rhs.num_rows(), 1);
	CHECK_EQ(lhs.num_columns(), rhs.num_columns());

	const complex<float>* const* lhs_elements = lhs.elements();
	const complex<float>* const* rhs_elements = rhs.elements();

	complex<float> result = complex<float>(0.f, 0.f);
	for (int i = 0; i < lhs.num_columns(); ++i) {
	result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
	}

	return result;
	}

	// Works for positive numbers only.
	int Round(float x) {
	return std::floor(x + 0.5f);
	}

	// Calculates the sum of absolute values of a complex matrix.
	float SumAbs(const ComplexMatrix<float>& mat) {
	float sum_abs = 0.f;
	const complex<float>* const* mat_els = mat.elements();
	for (int i = 0; i < mat.num_rows(); ++i) {
	for (int j = 0; j < mat.num_columns(); ++j) {
	sum_abs += std::abs(mat_els[i][j]);
	}
	}
	return sum_abs;
	}

	// Calculates the sum of squares of a complex matrix.
	float SumSquares(const ComplexMatrix<float>& mat) {
	float sum_squares = 0.f;
	const complex<float>* const* mat_els = mat.elements();
	for (int i = 0; i < mat.num_rows(); ++i) {
	for (int j = 0; j < mat.num_columns(); ++j) {
	float abs_value = std::abs(mat_els[i][j]);
	sum_squares += abs_value * abs_value;
	}
	}
	return sum_squares;
	}

	// Does \|out\| = \|in\|.' * conj(\|in\|) for row vector \|in\|.
	void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
	ComplexMatrix<float>* out) {
	CHECK_EQ(in.num_rows(), 1);
	CHECK_EQ(out->num_rows(), in.num_columns());
	CHECK_EQ(out->num_columns(), in.num_columns());
	const complex<float>* in_elements = in.elements()[0];
	complex<float>* const* out_elements = out->elements();
	for (int i = 0; i < out->num_rows(); ++i) {
	for (int j = 0; j < out->num_columns(); ++j) {
	out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
	}
	}
	}

	std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
	for (int dim = 0; dim < 3; ++dim) {
	float center = 0.f;
	for (size_t i = 0; i < array_geometry.size(); ++i) {
	center += array_geometry[i].c[dim];
	}
	center /= array_geometry.size();
	for (size_t i = 0; i < array_geometry.size(); ++i) {
	array_geometry[i].c[dim] -= center;
	}
	}
	return array_geometry;
	}

	} // namespace

	NonlinearBeamformer::NonlinearBeamformer(
	const std::vector<Point>& array_geometry)
	: num_input_channels_(array_geometry.size()),
	array_geometry_(GetCenteredArray(array_geometry)) {
	WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
	}

	void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
	chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms);
	sample_rate_hz_ = sample_rate_hz;
	low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
	low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
	high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
	high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
	// These bin indexes determine the regions over which a mean is taken. This
	// is applied as a constant value over the adjacent end "frequency correction"
	// regions.
	//
	// low_mean_start_bin_ high_mean_start_bin_
	// v v constant
	// \|----------------\|--------\|----------------\|-------\|----------------\|
	// constant ^ ^
	// low_mean_end_bin_ high_mean_end_bin_
	//
	DCHECK_GT(low_mean_start_bin_, 0);
	DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
	DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
	DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
	DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);

	high_pass_postfilter_mask_ = 1.f;
	is_target_present_ = false;
	hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
	interference_blocks_count_ = hold_target_blocks_;


	lapped_transform_.reset(new LappedTransform(num_input_channels_,
	1,
	chunk_length_,
	window_,
	kFftSize,
	kFftSize / 2,
	this));
	for (int i = 0; i < kNumFreqBins; ++i) {
	time_smooth_mask_[i] = 1.f;
	final_mask_[i] = 1.f;
	float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
	wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
	mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
	kBeamwidthConstant * wave_numbers_[i] *
	wave_numbers_[i];
	}

	// Initialize all nonadaptive values before looping through the frames.
	InitDelaySumMasks();
	InitTargetCovMats();
	InitInterfCovMats();

	for (int i = 0; i < kNumFreqBins; ++i) {
	rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
	rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
	reflected_rpsiws_[i] =
	Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
	}
	}

	void NonlinearBeamformer::InitDelaySumMasks() {
	for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
	delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
	CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
	kFftSize,
	sample_rate_hz_,
	kSpeedOfSoundMeterSeconds,
	array_geometry_,
	kTargetAngleRadians,
	&delay_sum_masks_[f_ix]);

	complex_f norm_factor = sqrt(
	ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
	delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
	normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
	normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
	normalized_delay_sum_masks_[f_ix]));
	}
	}

	void NonlinearBeamformer::InitTargetCovMats() {
	for (int i = 0; i < kNumFreqBins; ++i) {
	target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
	TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
	complex_f normalization_factor = target_cov_mats_[i].Trace();
	target_cov_mats_[i].Scale(1.f / normalization_factor);
	}
	}

	void NonlinearBeamformer::InitInterfCovMats() {
	for (int i = 0; i < kNumFreqBins; ++i) {
	interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
	ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
	ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

	CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
	array_geometry_,
	&uniform_cov_mat);

	CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
	kInterfAngleRadians,
	i,
	kFftSize,
	kNumFreqBins,
	sample_rate_hz_,
	array_geometry_,
	&angled_cov_mat);
	// Normalize matrices before averaging them.
	complex_f normalization_factor = uniform_cov_mat.Trace();
	uniform_cov_mat.Scale(1.f / normalization_factor);
	normalization_factor = angled_cov_mat.Trace();
	angled_cov_mat.Scale(1.f / normalization_factor);

	// Average matrices.
	uniform_cov_mat.Scale(1 - kBalance);
	angled_cov_mat.Scale(kBalance);
	interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
	reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
	}
	}

	void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
	ChannelBuffer<float>* output) {
	DCHECK_EQ(input.num_channels(), num_input_channels_);
	DCHECK_EQ(input.num_frames_per_band(), chunk_length_);

	float old_high_pass_mask = high_pass_postfilter_mask_;
	lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
	// Ramp up/down for smoothing. 1 mask per 10ms results in audible
	// discontinuities.
	const float ramp_increment =
	(high_pass_postfilter_mask_ - old_high_pass_mask) /
	input.num_frames_per_band();
	// Apply delay and sum and post-filter in the time domain. WARNING: only works
	// because delay-and-sum is not frequency dependent.
	for (int i = 1; i < input.num_bands(); ++i) {
	float smoothed_mask = old_high_pass_mask;
	for (int j = 0; j < input.num_frames_per_band(); ++j) {
	smoothed_mask += ramp_increment;

	// Applying the delay and sum (at zero degrees, this is equivalent to
	// averaging).
	float sum = 0.f;
	for (int k = 0; k < input.num_channels(); ++k) {
	sum += input.channels(i)[k][j];
	}
	output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
	}
	}
	}

	void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
	int num_input_channels,
	int num_freq_bins,
	int num_output_channels,
	complex_f* const* output) {
	CHECK_EQ(num_freq_bins, kNumFreqBins);
	CHECK_EQ(num_input_channels, num_input_channels_);
	CHECK_EQ(num_output_channels, 1);

	// Calculating the post-filter masks. Note that we need two for each
	// frequency bin to account for the positive and negative interferer
	// angle.
	for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
	eig_m_.CopyFromColumn(input, i, num_input_channels_);
	float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
	if (eig_m_norm_factor != 0.f) {
	eig_m_.Scale(1.f / eig_m_norm_factor);
	}

	float rxim = Norm(target_cov_mats_[i], eig_m_);
	float ratio_rxiw_rxim = 0.f;
	if (rxim > 0.f) {
	ratio_rxiw_rxim = rxiws_[i] / rxim;
	}

	complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
	rmw *= rmw;
	float rmw_r = rmw.real();

	new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
	rpsiws_[i],
	ratio_rxiw_rxim,
	rmw_r,
	mask_thresholds_[i]);

	new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
	reflected_rpsiws_[i],
	ratio_rxiw_rxim,
	rmw_r,
	mask_thresholds_[i]);
	}

	ApplyMaskTimeSmoothing();
	EstimateTargetPresence();
	ApplyLowFrequencyCorrection();
	ApplyHighFrequencyCorrection();
	ApplyMaskFrequencySmoothing();
	ApplyMasks(input, output);
	}

	float NonlinearBeamformer::CalculatePostfilterMask(
	const ComplexMatrixF& interf_cov_mat,
	float rpsiw,
	float ratio_rxiw_rxim,
	float rmw_r,
	float mask_threshold) {
	float rpsim = Norm(interf_cov_mat, eig_m_);

	// Find lambda.
	float ratio = 0.f;
	if (rpsim > 0.f) {
	ratio = rpsiw / rpsim;
	}
	float numerator = rmw_r - ratio;
	float denominator = ratio_rxiw_rxim - ratio;

	float mask = 1.f;
	if (denominator > mask_threshold) {
	float lambda = numerator / denominator;
	mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
	}
	return mask;
	}

	void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
	complex_f* const* output) {
	complex_f* output_channel = output[0];
	for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
	output_channel[f_ix] = complex_f(0.f, 0.f);

	const complex_f* delay_sum_mask_els =
	normalized_delay_sum_masks_[f_ix].elements()[0];
	for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
	output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
	}

	output_channel[f_ix] *= final_mask_[f_ix];
	}
	}

	// Smooth new_mask_ into time_smooth_mask_.
	void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
	for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
	time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
	(1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
	}
	}

	// Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
	void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
	// Smooth over frequency in both directions. The "frequency correction"
	// regions have constant value, but we enter them to smooth over the jump
	// that exists at the boundary. However, this does mean when smoothing "away"
	// from the region that we only need to use the last element.
	//
	// Upward smoothing:
	// low_mean_start_bin_
	// v
	// \|------\|------------\|------\|
	// ^------------------>^
	//
	// Downward smoothing:
	// high_mean_end_bin_
	// v
	// \|------\|------------\|------\|
	// ^<------------------^
	std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
	for (int i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
	final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
	(1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
	}
	for (int i = high_mean_end_bin_; i >= 0; --i) {
	final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
	(1 - kMaskFrequencySmoothAlpha) * final_mask_[i + 1];
	}
	}

	// Apply low frequency correction to time_smooth_mask_.
	void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
	const float low_frequency_mask =
	MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
	std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
	low_frequency_mask);
	}

	// Apply high frequency correction to time_smooth_mask_. Update
	// high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
	void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
	high_pass_postfilter_mask_ =
	MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
	std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
	time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
	}

	// Compute mean over the given range of time_smooth_mask_, [first, last).
	float NonlinearBeamformer::MaskRangeMean(int first, int last) {
	DCHECK_GT(last, first);
	const float sum = std::accumulate(time_smooth_mask_ + first,
	time_smooth_mask_ + last, 0.f);
	return sum / (last - first);
	}

	void NonlinearBeamformer::EstimateTargetPresence() {
	const int quantile =
	(high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
	low_mean_start_bin_;
	std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
	new_mask_ + high_mean_end_bin_ + 1);
	if (new_mask_[quantile] > kMaskTargetThreshold) {
	is_target_present_ = true;
	interference_blocks_count_ = 0;
	} else {
	is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
	}
	}

	} // namespace webrtc