Add 48kHz support to AGC Doing the same for the 16-24kHz band than was done in the 8-16kHz. Results look and sound as nice. Originally reviewed here: https://webrtc-codereview.appspot.com/26339004/ BUG=webrtc:3146 R=andrew@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/28299004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7917 4adac7df-926f-26a2-2b94-8c16560cd09d

commit: cf6d0b64ef8f1d4025f28fd55723be737155ceac [log] [tgz]
author: aluebs@webrtc.org <aluebs@webrtc.org> Tue Dec 16 20:56:09 2014 +0000
committer: aluebs@webrtc.org <aluebs@webrtc.org> Tue Dec 16 20:56:09 2014 +0000
tree: 7a0a3b70cb0abfc488c48cbd2203242ab0d6cb1b
parent: 2510d11c0f69c8cf840279da6593ec34a80a9b0c [diff]
diff --git a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
index 7340525..9a207c3 100644
--- a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c

@@ -112,7 +112,7 @@
         6726, 5343, 4244, 3371, 2678, 2127, 1690, 1342, 1066, 847, 673, 534, 424, 337, 268,
         213, 169, 134, 107, 85, 67};
 
-int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
+int WebRtcAgc_AddMic(void *state, int16_t* const* in_mic, int16_t num_bands,
                      int16_t samples)
 {
     int32_t nrg, max_nrg, sample, tmp32;
@@ -134,17 +134,6 @@
         }
     }
 
-    /* Check for valid pointers based on sampling rate */
-    if ((stt->fs == 32000) && (in_mic_H == NULL))
-    {
-        return -1;
-    }
-    /* Check for valid pointer for low band */
-    if (in_mic == NULL)
-    {
-        return -1;
-    }
-
     /* apply slowly varying digital gain */
     if (stt->micVol > stt->maxAnalog)
     {
@@ -175,32 +164,19 @@
 
         for (i = 0; i < samples; i++)
         {
-            // For lower band
-            sample = (in_mic[i] * gain) >> 12;
-            if (sample > 32767)
+            int j;
+            for (j = 0; j < num_bands; ++j)
             {
-                in_mic[i] = 32767;
-            } else if (sample < -32768)
-            {
-                in_mic[i] = -32768;
-            } else
-            {
-                in_mic[i] = (int16_t)sample;
-            }
-
-            // For higher band
-            if (stt->fs == 32000)
-            {
-                sample = (in_mic_H[i] * gain) >> 12;
+                sample = (in_mic[j][i] * gain) >> 12;
                 if (sample > 32767)
                 {
-                    in_mic_H[i] = 32767;
+                    in_mic[j][i] = 32767;
                 } else if (sample < -32768)
                 {
-                    in_mic_H[i] = -32768;
+                    in_mic[j][i] = -32768;
                 } else
                 {
-                    in_mic_H[i] = (int16_t)sample;
+                    in_mic[j][i] = (int16_t)sample;
                 }
             }
         }
@@ -224,7 +200,8 @@
         max_nrg = 0;
         for (n = 0; n < L; n++)
         {
-            nrg = WEBRTC_SPL_MUL_16_16(in_mic[i * L + n], in_mic[i * L + n]);
+            nrg = WEBRTC_SPL_MUL_16_16(in_mic[0][i * L + n],
+                                       in_mic[0][i * L + n]);
             if (nrg > max_nrg)
             {
                 max_nrg = nrg;
@@ -246,10 +223,13 @@
     {
         if (stt->fs == 16000)
         {
-            WebRtcSpl_DownsampleBy2(&in_mic[i * 32], 32, tmp_speech, stt->filterState);
+            WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32],
+                                    32,
+                                    tmp_speech,
+                                    stt->filterState);
         } else
         {
-            memcpy(tmp_speech, &in_mic[i * 16], 16 * sizeof(short));
+            memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short));
         }
         /* Compute energy in blocks of 16 samples */
         ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4);
@@ -265,7 +245,7 @@
     }
 
     /* call VAD (use low band only) */
-    WebRtcAgc_ProcessVad(&stt->vadMic, in_mic, samples);
+    WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples);
 
     return 0;
 }
@@ -286,7 +266,7 @@
         {
             return -1;
         }
-    } else if (stt->fs == 16000 || stt->fs == 32000)
+    } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
     {
         if (samples != 160)
         {
@@ -300,13 +280,13 @@
     return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples);
 }
 
-int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
-                         int16_t samples, int32_t micLevelIn,
+int WebRtcAgc_VirtualMic(void *agcInst, int16_t* const* in_near,
+                         int16_t num_bands, int16_t samples, int32_t micLevelIn,
                          int32_t *micLevelOut)
 {
     int32_t tmpFlt, micLevelTmp, gainIdx;
     uint16_t gain;
-    int16_t ii;
+    int16_t ii, j;
     Agc_t *stt;
 
     uint32_t nrg;
@@ -329,7 +309,7 @@
         frameNrgLimit = frameNrgLimit << 1;
     }
 
-    frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0], in_near[0]);
+    frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0][0], in_near[0][0]);
     for (sampleCntr = 1; sampleCntr < samples; sampleCntr++)
     {
 
@@ -337,12 +317,14 @@
         // the correct value of the energy is not important
         if (frameNrg < frameNrgLimit)
         {
-            nrg = WEBRTC_SPL_MUL_16_16(in_near[sampleCntr], in_near[sampleCntr]);
+            nrg = WEBRTC_SPL_MUL_16_16(in_near[0][sampleCntr],
+                                       in_near[0][sampleCntr]);
             frameNrg += nrg;
         }
 
         // Count the zero crossings
-        numZeroCrossing += ((in_near[sampleCntr] ^ in_near[sampleCntr - 1]) < 0);
+        numZeroCrossing +=
+                ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0);
     }
 
     if ((frameNrg < 500) || (numZeroCrossing <= 5))
@@ -389,7 +371,7 @@
     }
     for (ii = 0; ii < samples; ii++)
     {
-        tmpFlt = (in_near[ii] * gain) >> 10;
+        tmpFlt = (in_near[0][ii] * gain) >> 10;
         if (tmpFlt > 32767)
         {
             tmpFlt = 32767;
@@ -414,10 +396,10 @@
                 gain = kSuppressionTableVirtualMic[127 - gainIdx];
             }
         }
-        in_near[ii] = (int16_t)tmpFlt;
-        if (stt->fs == 32000)
+        in_near[0][ii] = (int16_t)tmpFlt;
+        for (j = 1; j < num_bands; ++j)
         {
-            tmpFlt = (in_near_H[ii] * gain) >> 10;
+            tmpFlt = (in_near[j][ii] * gain) >> 10;
             if (tmpFlt > 32767)
             {
                 tmpFlt = 32767;
@@ -426,7 +408,7 @@
             {
                 tmpFlt = -32768;
             }
-            in_near_H[ii] = (int16_t)tmpFlt;
+            in_near[j][ii] = (int16_t)tmpFlt;
         }
     }
     /* Set the level we (finally) used */
@@ -434,7 +416,7 @@
 //    *micLevelOut = stt->micGainIdx;
     *micLevelOut = stt->micGainIdx >> stt->scale;
     /* Add to Mic as if it was the output from a true microphone */
-    if (WebRtcAgc_AddMic(agcInst, in_near, in_near_H, samples) != 0)
+    if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0)
     {
         return -1;
     }
@@ -1158,9 +1140,9 @@
     return 0;
 }
 
-int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
-                      const int16_t *in_near_H, int16_t samples,
-                      int16_t *out, int16_t *out_H, int32_t inMicLevel,
+int WebRtcAgc_Process(void *agcInst, const int16_t* const* in_near,
+                      int16_t num_bands, int16_t samples,
+                      int16_t* const* out, int32_t inMicLevel,
                       int32_t *outMicLevel, int16_t echo,
                       uint8_t *saturationWarning)
 {
@@ -1182,7 +1164,7 @@
         {
             return -1;
         }
-    } else if (stt->fs == 16000 || stt->fs == 32000)
+    } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
     {
         if (samples != 160)
         {
@@ -1193,17 +1175,6 @@
         return -1;
     }
 
-    /* Check for valid pointers based on sampling rate */
-    if (stt->fs == 32000 && in_near_H == NULL)
-    {
-        return -1;
-    }
-    /* Check for valid pointers for low band */
-    if (in_near == NULL)
-    {
-        return -1;
-    }
-
     *saturationWarning = 0;
     //TODO: PUT IN RANGE CHECKING FOR INPUT LEVELS
     *outMicLevel = inMicLevel;
@@ -1214,9 +1185,8 @@
 
     if (WebRtcAgc_ProcessDigital(&stt->digitalAgc,
                                  in_near,
-                                 in_near_H,
+                                 num_bands,
                                  out,
-                                 out_H,
                                  stt->fs,
                                  stt->lowLevelSignal) == -1)
     {

diff --git a/webrtc/modules/audio_processing/agc/legacy/digital_agc.c b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
index 366cd5d..81e012e 100644
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c

@@ -293,9 +293,11 @@
     return 0;
 }
 
-int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
-                                 const int16_t *in_near_H, int16_t *out,
-                                 int16_t *out_H, uint32_t FS,
+int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt,
+                                 const int16_t* const* in_near,
+                                 int16_t num_bands,
+                                 int16_t* const* out,
+                                 uint32_t FS,
                                  int16_t lowlevelSignal)
 {
     // array for gains (one value per ms, incl start & end)
@@ -303,7 +305,7 @@
 
     int32_t out_tmp, tmp32;
     int32_t env[10];
-    int32_t nrg, max_nrg;
+    int32_t max_nrg;
     int32_t cur_level;
     int32_t gain32, delta;
     int16_t logratio;
@@ -311,7 +313,7 @@
     int16_t zeros = 0, zeros_fast, frac = 0;
     int16_t decay;
     int16_t gate, gain_adj;
-    int16_t k, n;
+    int16_t k, n, i;
     int16_t L, L2; // samples/subframe
 
     // determine number of samples per ms
@@ -319,7 +321,7 @@
     {
         L = 8;
         L2 = 3;
-    } else if (FS == 16000 || FS == 32000)
+    } else if (FS == 16000 || FS == 32000 || FS == 48000)
     {
         L = 16;
         L2 = 4;
@@ -328,20 +330,16 @@
         return -1;
     }
 
-    if (in_near != out)
+    for (i = 0; i < num_bands; ++i)
     {
-        // Only needed if they don't already point to the same place.
-        memcpy(out, in_near, 10 * L * sizeof(int16_t));
-    }
-    if (FS == 32000)
-    {
-        if (in_near_H != out_H)
+        if (in_near[i] != out[i])
         {
-            memcpy(out_H, in_near_H, 10 * L * sizeof(int16_t));
+            // Only needed if they don't already point to the same place.
+            memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
         }
     }
     // VAD for near end
-    logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
+    logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
 
     // Account for far end VAD
     if (stt->vadFarend.counter > 10)
@@ -407,7 +405,8 @@
         max_nrg = 0;
         for (n = 0; n < L; n++)
         {
-            nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
+            int32_t nrg = WEBRTC_SPL_MUL_16_16(out[0][k * L + n],
+                                               out[0][k * L + n]);
             if (nrg > max_nrg)
             {
                 max_nrg = nrg;
@@ -568,35 +567,20 @@
     // iterate over samples
     for (n = 0; n < L; n++)
     {
-        // For lower band
-        tmp32 = out[n] * ((gain32 + 127) >> 7);
-        out_tmp = tmp32 >> 16;
-        if (out_tmp > 4095)
+        for (i = 0; i < num_bands; ++i)
         {
-            out[n] = (int16_t)32767;
-        } else if (out_tmp < -4096)
-        {
-            out[n] = (int16_t)-32768;
-        } else
-        {
-            tmp32 = out[n] * (gain32 >> 4);
-            out[n] = (int16_t)(tmp32 >> 16);
-        }
-        // For higher band
-        if (FS == 32000)
-        {
-            tmp32 = out_H[n] * ((gain32 + 127) >> 7);
+            tmp32 = out[i][n] * ((gain32 + 127) >> 7);
             out_tmp = tmp32 >> 16;
             if (out_tmp > 4095)
             {
-                out_H[n] = (int16_t)32767;
+                out[i][n] = (int16_t)32767;
             } else if (out_tmp < -4096)
             {
-                out_H[n] = (int16_t)-32768;
+                out[i][n] = (int16_t)-32768;
             } else
             {
-                tmp32 = out_H[n] * (gain32 >> 4);
-                out_H[n] = (int16_t)(tmp32 >> 16);
+                tmp32 = out[i][n] * (gain32 >> 4);
+                out[i][n] = (int16_t)(tmp32 >> 16);
             }
         }
         //
@@ -611,14 +595,10 @@
         // iterate over samples
         for (n = 0; n < L; n++)
         {
-            // For lower band
-            tmp32 = out[k * L + n] * (gain32 >> 4);
-            out[k * L + n] = (int16_t)(tmp32 >> 16);
-            // For higher band
-            if (FS == 32000)
+            for (i = 0; i < num_bands; ++i)
             {
-                tmp32 = out_H[k * L + n] * (gain32 >> 4);
-                out_H[k * L + n] = (int16_t)(tmp32 >> 16);
+                tmp32 = out[i][k * L + n] * (gain32 >> 4);
+                out[i][k * L + n] = (int16_t)(tmp32 >> 16);
             }
             gain32 += delta;
         }

diff --git a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
index 2d13650..c4316f1 100644
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h

@@ -55,8 +55,10 @@
 int32_t WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, int16_t agcMode);
 
 int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst,
-                                 const int16_t *inNear, const int16_t *inNear_H,
-                                 int16_t *out, int16_t *out_H, uint32_t FS,
+                                 const int16_t* const* inNear,
+                                 int16_t num_bands,
+                                 int16_t* const* out,
+                                 uint32_t FS,
                                  int16_t lowLevelSignal);
 
 int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst,

diff --git a/webrtc/modules/audio_processing/agc/legacy/gain_control.h b/webrtc/modules/audio_processing/agc/legacy/gain_control.h
index 7eeb78b..b668be5 100644
--- a/webrtc/modules/audio_processing/agc/legacy/gain_control.h
+++ b/webrtc/modules/audio_processing/agc/legacy/gain_control.h

@@ -52,7 +52,8 @@
 /*
  * This function processes a 10 ms frame of far-end speech to determine
  * if there is active speech. The length of the input speech vector must be
- * given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000).
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000).
  *
  * Input:
  *      - agcInst           : AGC instance.
@@ -70,17 +71,17 @@
 /*
  * This function processes a 10 ms frame of microphone speech to determine
  * if there is active speech. The length of the input speech vector must be
- * given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000). For
- * very low input levels, the input signal is increased in level by multiplying
- * and overwriting the samples in inMic[].
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000). For very low input levels, the input signal is increased in level
+ * by multiplying and overwriting the samples in inMic[].
  *
  * This function should be called before any further processing of the
  * near-end microphone signal.
  *
  * Input:
  *      - agcInst           : AGC instance.
- *      - inMic             : Microphone input speech vector for L band
- *      - inMic_H           : Microphone input speech vector for H band
+ *      - inMic             : Microphone input speech vector for each band
+ *      - num_bands         : Number of bands in input vector
  *      - samples           : Number of samples in input vector
  *
  * Return value:
@@ -88,8 +89,8 @@
  *                          : -1 - Error
  */
 int WebRtcAgc_AddMic(void* agcInst,
-                     int16_t* inMic,
-                     int16_t* inMic_H,
+                     int16_t* const* inMic,
+                     int16_t num_bands,
                      int16_t samples);
 
 /*
@@ -97,12 +98,12 @@
  * It is a digital gain applied to the input signal and is used in the
  * agcAdaptiveDigital mode where no microphone level is adjustable. The length
  * of the input speech vector must be given in samples (80 when FS=8000, and 160
- * when FS=16000 or FS=32000).
+ * when FS=16000, FS=32000 or FS=48000).
  *
  * Input:
  *      - agcInst           : AGC instance.
- *      - inMic             : Microphone input speech vector for L band
- *      - inMic_H           : Microphone input speech vector for H band
+ *      - inMic             : Microphone input speech vector for each band
+ *      - num_bands         : Number of bands in input vector
  *      - samples           : Number of samples in input vector
  *      - micLevelIn        : Input level of microphone (static)
  *
@@ -116,8 +117,8 @@
  *                          : -1 - Error
  */
 int WebRtcAgc_VirtualMic(void* agcInst,
-                         int16_t* inMic,
-                         int16_t* inMic_H,
+                         int16_t* const* inMic,
+                         int16_t num_bands,
                          int16_t samples,
                          int32_t micLevelIn,
                          int32_t* micLevelOut);
@@ -126,16 +127,17 @@
  * This function processes a 10 ms frame and adjusts (normalizes) the gain both
  * analog and digitally. The gain adjustments are done only during active
  * periods of speech. The length of the speech vectors must be given in samples
- * (80 when FS=8000, and 160 when FS=16000 or FS=32000). The echo parameter can
- * be used to ensure the AGC will not adjust upward in the presence of echo.
+ * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
+ * parameter can be used to ensure the AGC will not adjust upward in the
+ * presence of echo.
  *
  * This function should be called after processing the near-end microphone
  * signal, in any case after any echo cancellation.
  *
  * Input:
  *      - agcInst           : AGC instance
- *      - inNear            : Near-end input speech vector for L band
- *      - inNear_H          : Near-end input speech vector for H band
+ *      - inNear            : Near-end input speech vector for each band
+ *      - num_bands         : Number of bands in input/output vector
  *      - samples           : Number of samples in input/output vector
  *      - inMicLevel        : Current microphone volume level
  *      - echo              : Set to 0 if the signal passed to add_mic is
@@ -145,9 +147,8 @@
  *
  * Output:
  *      - outMicLevel       : Adjusted microphone volume level
- *      - out               : Gain-adjusted near-end speech vector (L band)
+ *      - out               : Gain-adjusted near-end speech vector
  *                          : May be the same vector as the input.
- *      - out_H             : Gain-adjusted near-end speech vector (H band)
  *      - saturationWarning : A returned value of 1 indicates a saturation event
  *                            has occurred and the volume cannot be further
  *                            reduced. Otherwise will be set to 0.
@@ -157,11 +158,10 @@
  *                          : -1 - Error
  */
 int WebRtcAgc_Process(void* agcInst,
-                      const int16_t* inNear,
-                      const int16_t* inNear_H,
+                      const int16_t* const* inNear,
+                      int16_t num_bands,
                       int16_t samples,
-                      int16_t* out,
-                      int16_t* out_H,
+                      int16_t* const* out,
                       int32_t inMicLevel,
                       int32_t* outMicLevel,
                       int16_t echo,

diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc
index 60744a8..10870e4 100644
--- a/webrtc/modules/audio_processing/gain_control_impl.cc
+++ b/webrtc/modules/audio_processing/gain_control_impl.cc

@@ -90,8 +90,8 @@
       Handle* my_handle = static_cast<Handle*>(handle(i));
       err = WebRtcAgc_AddMic(
           my_handle,
-          audio->split_bands(i)[kBand0To8kHz],
-          audio->split_bands(i)[kBand8To16kHz],
+          audio->split_bands(i),
+          audio->num_bands(),
           static_cast<int16_t>(audio->samples_per_split_channel()));
 
       if (err != apm_->kNoError) {
@@ -106,8 +106,8 @@
 
       err = WebRtcAgc_VirtualMic(
           my_handle,
-          audio->split_bands(i)[kBand0To8kHz],
-          audio->split_bands(i)[kBand8To16kHz],
+          audio->split_bands(i),
+          audio->num_bands(),
           static_cast<int16_t>(audio->samples_per_split_channel()),
           analog_capture_level_,
           &capture_level_out);
@@ -144,11 +144,10 @@
 
     int err = WebRtcAgc_Process(
         my_handle,
-        audio->split_bands_const(i)[kBand0To8kHz],
-        audio->split_bands_const(i)[kBand8To16kHz],
+        audio->split_bands_const(i),
+        audio->num_bands(),
         static_cast<int16_t>(audio->samples_per_split_channel()),
-        audio->split_bands(i)[kBand0To8kHz],
-        audio->split_bands(i)[kBand8To16kHz],
+        audio->split_bands(i),
         capture_levels_[i],
         &capture_level_out,
         apm_->echo_cancellation()->stream_has_echo(),
commit	cf6d0b64ef8f1d4025f28fd55723be737155ceac	[log] [tgz]
author	aluebs@webrtc.org <aluebs@webrtc.org>	Tue Dec 16 20:56:09 2014 +0000
committer	aluebs@webrtc.org <aluebs@webrtc.org>	Tue Dec 16 20:56:09 2014 +0000
tree	7a0a3b70cb0abfc488c48cbd2203242ab0d6cb1b
parent	2510d11c0f69c8cf840279da6593ec34a80a9b0c [diff]