Add 48kHz support to AGC
Doing the same for the 16-24kHz band than was done in the 8-16kHz.
Results look and sound as nice.
Originally reviewed here:
https://webrtc-codereview.appspot.com/26339004/
BUG=webrtc:3146
R=andrew@webrtc.org, kwiberg@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/28299004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@7917 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
index 7340525..9a207c3 100644
--- a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
@@ -112,7 +112,7 @@
6726, 5343, 4244, 3371, 2678, 2127, 1690, 1342, 1066, 847, 673, 534, 424, 337, 268,
213, 169, 134, 107, 85, 67};
-int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
+int WebRtcAgc_AddMic(void *state, int16_t* const* in_mic, int16_t num_bands,
int16_t samples)
{
int32_t nrg, max_nrg, sample, tmp32;
@@ -134,17 +134,6 @@
}
}
- /* Check for valid pointers based on sampling rate */
- if ((stt->fs == 32000) && (in_mic_H == NULL))
- {
- return -1;
- }
- /* Check for valid pointer for low band */
- if (in_mic == NULL)
- {
- return -1;
- }
-
/* apply slowly varying digital gain */
if (stt->micVol > stt->maxAnalog)
{
@@ -175,32 +164,19 @@
for (i = 0; i < samples; i++)
{
- // For lower band
- sample = (in_mic[i] * gain) >> 12;
- if (sample > 32767)
+ int j;
+ for (j = 0; j < num_bands; ++j)
{
- in_mic[i] = 32767;
- } else if (sample < -32768)
- {
- in_mic[i] = -32768;
- } else
- {
- in_mic[i] = (int16_t)sample;
- }
-
- // For higher band
- if (stt->fs == 32000)
- {
- sample = (in_mic_H[i] * gain) >> 12;
+ sample = (in_mic[j][i] * gain) >> 12;
if (sample > 32767)
{
- in_mic_H[i] = 32767;
+ in_mic[j][i] = 32767;
} else if (sample < -32768)
{
- in_mic_H[i] = -32768;
+ in_mic[j][i] = -32768;
} else
{
- in_mic_H[i] = (int16_t)sample;
+ in_mic[j][i] = (int16_t)sample;
}
}
}
@@ -224,7 +200,8 @@
max_nrg = 0;
for (n = 0; n < L; n++)
{
- nrg = WEBRTC_SPL_MUL_16_16(in_mic[i * L + n], in_mic[i * L + n]);
+ nrg = WEBRTC_SPL_MUL_16_16(in_mic[0][i * L + n],
+ in_mic[0][i * L + n]);
if (nrg > max_nrg)
{
max_nrg = nrg;
@@ -246,10 +223,13 @@
{
if (stt->fs == 16000)
{
- WebRtcSpl_DownsampleBy2(&in_mic[i * 32], 32, tmp_speech, stt->filterState);
+ WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32],
+ 32,
+ tmp_speech,
+ stt->filterState);
} else
{
- memcpy(tmp_speech, &in_mic[i * 16], 16 * sizeof(short));
+ memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short));
}
/* Compute energy in blocks of 16 samples */
ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4);
@@ -265,7 +245,7 @@
}
/* call VAD (use low band only) */
- WebRtcAgc_ProcessVad(&stt->vadMic, in_mic, samples);
+ WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples);
return 0;
}
@@ -286,7 +266,7 @@
{
return -1;
}
- } else if (stt->fs == 16000 || stt->fs == 32000)
+ } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
{
if (samples != 160)
{
@@ -300,13 +280,13 @@
return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples);
}
-int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
- int16_t samples, int32_t micLevelIn,
+int WebRtcAgc_VirtualMic(void *agcInst, int16_t* const* in_near,
+ int16_t num_bands, int16_t samples, int32_t micLevelIn,
int32_t *micLevelOut)
{
int32_t tmpFlt, micLevelTmp, gainIdx;
uint16_t gain;
- int16_t ii;
+ int16_t ii, j;
Agc_t *stt;
uint32_t nrg;
@@ -329,7 +309,7 @@
frameNrgLimit = frameNrgLimit << 1;
}
- frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0], in_near[0]);
+ frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0][0], in_near[0][0]);
for (sampleCntr = 1; sampleCntr < samples; sampleCntr++)
{
@@ -337,12 +317,14 @@
// the correct value of the energy is not important
if (frameNrg < frameNrgLimit)
{
- nrg = WEBRTC_SPL_MUL_16_16(in_near[sampleCntr], in_near[sampleCntr]);
+ nrg = WEBRTC_SPL_MUL_16_16(in_near[0][sampleCntr],
+ in_near[0][sampleCntr]);
frameNrg += nrg;
}
// Count the zero crossings
- numZeroCrossing += ((in_near[sampleCntr] ^ in_near[sampleCntr - 1]) < 0);
+ numZeroCrossing +=
+ ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0);
}
if ((frameNrg < 500) || (numZeroCrossing <= 5))
@@ -389,7 +371,7 @@
}
for (ii = 0; ii < samples; ii++)
{
- tmpFlt = (in_near[ii] * gain) >> 10;
+ tmpFlt = (in_near[0][ii] * gain) >> 10;
if (tmpFlt > 32767)
{
tmpFlt = 32767;
@@ -414,10 +396,10 @@
gain = kSuppressionTableVirtualMic[127 - gainIdx];
}
}
- in_near[ii] = (int16_t)tmpFlt;
- if (stt->fs == 32000)
+ in_near[0][ii] = (int16_t)tmpFlt;
+ for (j = 1; j < num_bands; ++j)
{
- tmpFlt = (in_near_H[ii] * gain) >> 10;
+ tmpFlt = (in_near[j][ii] * gain) >> 10;
if (tmpFlt > 32767)
{
tmpFlt = 32767;
@@ -426,7 +408,7 @@
{
tmpFlt = -32768;
}
- in_near_H[ii] = (int16_t)tmpFlt;
+ in_near[j][ii] = (int16_t)tmpFlt;
}
}
/* Set the level we (finally) used */
@@ -434,7 +416,7 @@
// *micLevelOut = stt->micGainIdx;
*micLevelOut = stt->micGainIdx >> stt->scale;
/* Add to Mic as if it was the output from a true microphone */
- if (WebRtcAgc_AddMic(agcInst, in_near, in_near_H, samples) != 0)
+ if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0)
{
return -1;
}
@@ -1158,9 +1140,9 @@
return 0;
}
-int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
- const int16_t *in_near_H, int16_t samples,
- int16_t *out, int16_t *out_H, int32_t inMicLevel,
+int WebRtcAgc_Process(void *agcInst, const int16_t* const* in_near,
+ int16_t num_bands, int16_t samples,
+ int16_t* const* out, int32_t inMicLevel,
int32_t *outMicLevel, int16_t echo,
uint8_t *saturationWarning)
{
@@ -1182,7 +1164,7 @@
{
return -1;
}
- } else if (stt->fs == 16000 || stt->fs == 32000)
+ } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
{
if (samples != 160)
{
@@ -1193,17 +1175,6 @@
return -1;
}
- /* Check for valid pointers based on sampling rate */
- if (stt->fs == 32000 && in_near_H == NULL)
- {
- return -1;
- }
- /* Check for valid pointers for low band */
- if (in_near == NULL)
- {
- return -1;
- }
-
*saturationWarning = 0;
//TODO: PUT IN RANGE CHECKING FOR INPUT LEVELS
*outMicLevel = inMicLevel;
@@ -1214,9 +1185,8 @@
if (WebRtcAgc_ProcessDigital(&stt->digitalAgc,
in_near,
- in_near_H,
+ num_bands,
out,
- out_H,
stt->fs,
stt->lowLevelSignal) == -1)
{
diff --git a/webrtc/modules/audio_processing/agc/legacy/digital_agc.c b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
index 366cd5d..81e012e 100644
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
@@ -293,9 +293,11 @@
return 0;
}
-int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
- const int16_t *in_near_H, int16_t *out,
- int16_t *out_H, uint32_t FS,
+int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt,
+ const int16_t* const* in_near,
+ int16_t num_bands,
+ int16_t* const* out,
+ uint32_t FS,
int16_t lowlevelSignal)
{
// array for gains (one value per ms, incl start & end)
@@ -303,7 +305,7 @@
int32_t out_tmp, tmp32;
int32_t env[10];
- int32_t nrg, max_nrg;
+ int32_t max_nrg;
int32_t cur_level;
int32_t gain32, delta;
int16_t logratio;
@@ -311,7 +313,7 @@
int16_t zeros = 0, zeros_fast, frac = 0;
int16_t decay;
int16_t gate, gain_adj;
- int16_t k, n;
+ int16_t k, n, i;
int16_t L, L2; // samples/subframe
// determine number of samples per ms
@@ -319,7 +321,7 @@
{
L = 8;
L2 = 3;
- } else if (FS == 16000 || FS == 32000)
+ } else if (FS == 16000 || FS == 32000 || FS == 48000)
{
L = 16;
L2 = 4;
@@ -328,20 +330,16 @@
return -1;
}
- if (in_near != out)
+ for (i = 0; i < num_bands; ++i)
{
- // Only needed if they don't already point to the same place.
- memcpy(out, in_near, 10 * L * sizeof(int16_t));
- }
- if (FS == 32000)
- {
- if (in_near_H != out_H)
+ if (in_near[i] != out[i])
{
- memcpy(out_H, in_near_H, 10 * L * sizeof(int16_t));
+ // Only needed if they don't already point to the same place.
+ memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
}
}
// VAD for near end
- logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
+ logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
// Account for far end VAD
if (stt->vadFarend.counter > 10)
@@ -407,7 +405,8 @@
max_nrg = 0;
for (n = 0; n < L; n++)
{
- nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
+ int32_t nrg = WEBRTC_SPL_MUL_16_16(out[0][k * L + n],
+ out[0][k * L + n]);
if (nrg > max_nrg)
{
max_nrg = nrg;
@@ -568,35 +567,20 @@
// iterate over samples
for (n = 0; n < L; n++)
{
- // For lower band
- tmp32 = out[n] * ((gain32 + 127) >> 7);
- out_tmp = tmp32 >> 16;
- if (out_tmp > 4095)
+ for (i = 0; i < num_bands; ++i)
{
- out[n] = (int16_t)32767;
- } else if (out_tmp < -4096)
- {
- out[n] = (int16_t)-32768;
- } else
- {
- tmp32 = out[n] * (gain32 >> 4);
- out[n] = (int16_t)(tmp32 >> 16);
- }
- // For higher band
- if (FS == 32000)
- {
- tmp32 = out_H[n] * ((gain32 + 127) >> 7);
+ tmp32 = out[i][n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095)
{
- out_H[n] = (int16_t)32767;
+ out[i][n] = (int16_t)32767;
} else if (out_tmp < -4096)
{
- out_H[n] = (int16_t)-32768;
+ out[i][n] = (int16_t)-32768;
} else
{
- tmp32 = out_H[n] * (gain32 >> 4);
- out_H[n] = (int16_t)(tmp32 >> 16);
+ tmp32 = out[i][n] * (gain32 >> 4);
+ out[i][n] = (int16_t)(tmp32 >> 16);
}
}
//
@@ -611,14 +595,10 @@
// iterate over samples
for (n = 0; n < L; n++)
{
- // For lower band
- tmp32 = out[k * L + n] * (gain32 >> 4);
- out[k * L + n] = (int16_t)(tmp32 >> 16);
- // For higher band
- if (FS == 32000)
+ for (i = 0; i < num_bands; ++i)
{
- tmp32 = out_H[k * L + n] * (gain32 >> 4);
- out_H[k * L + n] = (int16_t)(tmp32 >> 16);
+ tmp32 = out[i][k * L + n] * (gain32 >> 4);
+ out[i][k * L + n] = (int16_t)(tmp32 >> 16);
}
gain32 += delta;
}
diff --git a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
index 2d13650..c4316f1 100644
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
@@ -55,8 +55,10 @@
int32_t WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, int16_t agcMode);
int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst,
- const int16_t *inNear, const int16_t *inNear_H,
- int16_t *out, int16_t *out_H, uint32_t FS,
+ const int16_t* const* inNear,
+ int16_t num_bands,
+ int16_t* const* out,
+ uint32_t FS,
int16_t lowLevelSignal);
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst,
diff --git a/webrtc/modules/audio_processing/agc/legacy/gain_control.h b/webrtc/modules/audio_processing/agc/legacy/gain_control.h
index 7eeb78b..b668be5 100644
--- a/webrtc/modules/audio_processing/agc/legacy/gain_control.h
+++ b/webrtc/modules/audio_processing/agc/legacy/gain_control.h
@@ -52,7 +52,8 @@
/*
* This function processes a 10 ms frame of far-end speech to determine
* if there is active speech. The length of the input speech vector must be
- * given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000).
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000).
*
* Input:
* - agcInst : AGC instance.
@@ -70,17 +71,17 @@
/*
* This function processes a 10 ms frame of microphone speech to determine
* if there is active speech. The length of the input speech vector must be
- * given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000). For
- * very low input levels, the input signal is increased in level by multiplying
- * and overwriting the samples in inMic[].
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000). For very low input levels, the input signal is increased in level
+ * by multiplying and overwriting the samples in inMic[].
*
* This function should be called before any further processing of the
* near-end microphone signal.
*
* Input:
* - agcInst : AGC instance.
- * - inMic : Microphone input speech vector for L band
- * - inMic_H : Microphone input speech vector for H band
+ * - inMic : Microphone input speech vector for each band
+ * - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
*
* Return value:
@@ -88,8 +89,8 @@
* : -1 - Error
*/
int WebRtcAgc_AddMic(void* agcInst,
- int16_t* inMic,
- int16_t* inMic_H,
+ int16_t* const* inMic,
+ int16_t num_bands,
int16_t samples);
/*
@@ -97,12 +98,12 @@
* It is a digital gain applied to the input signal and is used in the
* agcAdaptiveDigital mode where no microphone level is adjustable. The length
* of the input speech vector must be given in samples (80 when FS=8000, and 160
- * when FS=16000 or FS=32000).
+ * when FS=16000, FS=32000 or FS=48000).
*
* Input:
* - agcInst : AGC instance.
- * - inMic : Microphone input speech vector for L band
- * - inMic_H : Microphone input speech vector for H band
+ * - inMic : Microphone input speech vector for each band
+ * - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
* - micLevelIn : Input level of microphone (static)
*
@@ -116,8 +117,8 @@
* : -1 - Error
*/
int WebRtcAgc_VirtualMic(void* agcInst,
- int16_t* inMic,
- int16_t* inMic_H,
+ int16_t* const* inMic,
+ int16_t num_bands,
int16_t samples,
int32_t micLevelIn,
int32_t* micLevelOut);
@@ -126,16 +127,17 @@
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
* analog and digitally. The gain adjustments are done only during active
* periods of speech. The length of the speech vectors must be given in samples
- * (80 when FS=8000, and 160 when FS=16000 or FS=32000). The echo parameter can
- * be used to ensure the AGC will not adjust upward in the presence of echo.
+ * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
+ * parameter can be used to ensure the AGC will not adjust upward in the
+ * presence of echo.
*
* This function should be called after processing the near-end microphone
* signal, in any case after any echo cancellation.
*
* Input:
* - agcInst : AGC instance
- * - inNear : Near-end input speech vector for L band
- * - inNear_H : Near-end input speech vector for H band
+ * - inNear : Near-end input speech vector for each band
+ * - num_bands : Number of bands in input/output vector
* - samples : Number of samples in input/output vector
* - inMicLevel : Current microphone volume level
* - echo : Set to 0 if the signal passed to add_mic is
@@ -145,9 +147,8 @@
*
* Output:
* - outMicLevel : Adjusted microphone volume level
- * - out : Gain-adjusted near-end speech vector (L band)
+ * - out : Gain-adjusted near-end speech vector
* : May be the same vector as the input.
- * - out_H : Gain-adjusted near-end speech vector (H band)
* - saturationWarning : A returned value of 1 indicates a saturation event
* has occurred and the volume cannot be further
* reduced. Otherwise will be set to 0.
@@ -157,11 +158,10 @@
* : -1 - Error
*/
int WebRtcAgc_Process(void* agcInst,
- const int16_t* inNear,
- const int16_t* inNear_H,
+ const int16_t* const* inNear,
+ int16_t num_bands,
int16_t samples,
- int16_t* out,
- int16_t* out_H,
+ int16_t* const* out,
int32_t inMicLevel,
int32_t* outMicLevel,
int16_t echo,
diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc
index 60744a8..10870e4 100644
--- a/webrtc/modules/audio_processing/gain_control_impl.cc
+++ b/webrtc/modules/audio_processing/gain_control_impl.cc
@@ -90,8 +90,8 @@
Handle* my_handle = static_cast<Handle*>(handle(i));
err = WebRtcAgc_AddMic(
my_handle,
- audio->split_bands(i)[kBand0To8kHz],
- audio->split_bands(i)[kBand8To16kHz],
+ audio->split_bands(i),
+ audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()));
if (err != apm_->kNoError) {
@@ -106,8 +106,8 @@
err = WebRtcAgc_VirtualMic(
my_handle,
- audio->split_bands(i)[kBand0To8kHz],
- audio->split_bands(i)[kBand8To16kHz],
+ audio->split_bands(i),
+ audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()),
analog_capture_level_,
&capture_level_out);
@@ -144,11 +144,10 @@
int err = WebRtcAgc_Process(
my_handle,
- audio->split_bands_const(i)[kBand0To8kHz],
- audio->split_bands_const(i)[kBand8To16kHz],
+ audio->split_bands_const(i),
+ audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()),
- audio->split_bands(i)[kBand0To8kHz],
- audio->split_bands(i)[kBand8To16kHz],
+ audio->split_bands(i),
capture_levels_[i],
&capture_level_out,
apm_->echo_cancellation()->stream_has_echo(),