libmp3lame/vbrquantize.c - platform/external/lame - Git at Google

 /*
  *	MP3 quantization
  *
  *	Copyright (c) 1999-2000 Mark Taylor
  *	Copyright (c) 2000-2007 Robert Hegemann
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 02111-1307, USA.
  */

 /* $Id: vbrquantize.c,v 1.132.2.1 2008/08/05 14:16:07 robert Exp $ */

 #ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif


 #include "lame.h"
 #include "machine.h"
 #include "encoder.h"
 #include "util.h"
 #include "vbrquantize.h"
 #include "quantize_pvt.h"


 struct algo_s;
 typedef struct algo_s algo_t;

 typedef void (*alloc_sf_f) (const algo_t *, const int *, const int *, int);

 struct algo_s {
     alloc_sf_f alloc;
     const FLOAT *xr34orig;
     lame_internal_flags *gfc;
     gr_info *cod_info;
     int     mingain_l;
     int     mingain_s[3];
 };


 /*  Remarks on optimizing compilers:
  *
  *  the MSVC compiler may get into aliasing problems when accessing
  *  memory through the fi_union. declaring it volatile does the trick here
  *
  *  the calc_sfb_noise_* functions are not inlined because the intel compiler
  *  optimized executeables won't work as expected anymore
  */

 #ifdef _MSC_VER
 #  define VOLATILE volatile
 #else
 #  define VOLATILE
 #endif

 typedef VOLATILE union {
     float   f;
     int     i;
 } fi_union;


 #define DOUBLEX double

 #define MAGIC_FLOAT_def (65536*(128))
 #define MAGIC_INT_def    0x4b000000

 #ifdef TAKEHIRO_IEEE754_HACK
 #  define ROUNDFAC_def -0.0946f
 #else
 /*********************************************************************
  * XRPOW_FTOI is a macro to convert floats to ints.
  * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
  *                                         ROUNDFAC= -0.0946
  *
  * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
  *                                   ROUNDFAC=0.4054
  *********************************************************************/
 #  define QUANTFAC(rx)  adj43[rx]
 #  define ROUNDFAC_def 0.4054f
 #  define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
 #endif

 static int const MAGIC_INT = MAGIC_INT_def;
 #ifndef TAKEHIRO_IEEE754_HACK
 static DOUBLEX const ROUNDFAC = ROUNDFAC_def;
 #endif
 static DOUBLEX const MAGIC_FLOAT = MAGIC_FLOAT_def;


 static  FLOAT
 max_x34(const FLOAT * xr34, unsigned int bw)
 {
     FLOAT   xfsf = 0;
     unsigned int j = bw >> 1;
     unsigned int const remaining = (j & 0x01u);

     for (j >>= 1; j > 0; --j) {
         if (xfsf < xr34[0]) {
             xfsf = xr34[0];
         }
         if (xfsf < xr34[1]) {
             xfsf = xr34[1];
         }
         if (xfsf < xr34[2]) {
             xfsf = xr34[2];
         }
         if (xfsf < xr34[3]) {
             xfsf = xr34[3];
         }
         xr34 += 4;
     }
     if (remaining) {
         if (xfsf < xr34[0]) {
             xfsf = xr34[0];
         }
         if (xfsf < xr34[1]) {
             xfsf = xr34[1];
         }
     }
     return xfsf;
 }


 static  uint8_t
 find_lowest_scalefac(const FLOAT xr34)
 {
     uint8_t sf_ok = 255;
     uint8_t sf = 128, delsf = 64;
     uint8_t i;
     for (i = 0; i < 8; ++i) {
         FLOAT const xfsf = ipow20[sf] * xr34;
         if (xfsf <= IXMAX_VAL) {
             sf_ok = sf;
             sf -= delsf;
         }
         else {
             sf += delsf;
         }
         delsf >>= 1;
     }
     return sf_ok;
 }


 static int
 below_noise_floor(const FLOAT * xr, FLOAT l3xmin, unsigned int bw)
 {
     FLOAT   sum = 0.0;
     unsigned int i, j;
     for (i = 0, j = bw; j > 0; ++i, --j) {
         FLOAT const x = xr[i];
         sum += x * x;
     }
     return (l3xmin - sum) >= -1E-20 ? 1 : 0;
 }


 static void
 k_34_4(DOUBLEX x[4], int l3[4])
 {
 #ifdef TAKEHIRO_IEEE754_HACK
     fi_union fi[4];

     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
     x[0] += MAGIC_FLOAT;
     fi[0].f = x[0];
     x[1] += MAGIC_FLOAT;
     fi[1].f = x[1];
     x[2] += MAGIC_FLOAT;
     fi[2].f = x[2];
     x[3] += MAGIC_FLOAT;
     fi[3].f = x[3];
     fi[0].f = x[0] + adj43asm[fi[0].i - MAGIC_INT];
     fi[1].f = x[1] + adj43asm[fi[1].i - MAGIC_INT];
     fi[2].f = x[2] + adj43asm[fi[2].i - MAGIC_INT];
     fi[3].f = x[3] + adj43asm[fi[3].i - MAGIC_INT];
     l3[0] = fi[0].i - MAGIC_INT;
     l3[1] = fi[1].i - MAGIC_INT;
     l3[2] = fi[2].i - MAGIC_INT;
     l3[3] = fi[3].i - MAGIC_INT;
 #else
     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
     XRPOW_FTOI(x[0], l3[0]);
     XRPOW_FTOI(x[1], l3[1]);
     XRPOW_FTOI(x[2], l3[2]);
     XRPOW_FTOI(x[3], l3[3]);
     x[0] += QUANTFAC(l3[0]);
     x[1] += QUANTFAC(l3[1]);
     x[2] += QUANTFAC(l3[2]);
     x[3] += QUANTFAC(l3[3]);
     XRPOW_FTOI(x[0], l3[0]);
     XRPOW_FTOI(x[1], l3[1]);
     XRPOW_FTOI(x[2], l3[2]);
     XRPOW_FTOI(x[3], l3[3]);
 #endif
 }


 static void
 k_34_2(DOUBLEX x[2], int l3[2])
 {
 #ifdef TAKEHIRO_IEEE754_HACK
     fi_union fi[2];

     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL);
     x[0] += MAGIC_FLOAT;
     fi[0].f = x[0];
     x[1] += MAGIC_FLOAT;
     fi[1].f = x[1];
     fi[0].f = x[0] + adj43asm[fi[0].i - MAGIC_INT];
     fi[1].f = x[1] + adj43asm[fi[1].i - MAGIC_INT];
     l3[0] = fi[0].i - MAGIC_INT;
     l3[1] = fi[1].i - MAGIC_INT;
 #else
     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL);
     XRPOW_FTOI(x[0], l3[0]);
     XRPOW_FTOI(x[1], l3[1]);
     x[0] += QUANTFAC(l3[0]);
     x[1] += QUANTFAC(l3[1]);
     XRPOW_FTOI(x[0], l3[0]);
     XRPOW_FTOI(x[1], l3[1]);
 #endif
 }


 /*  do call the calc_sfb_noise_* functions only with sf values
  *  for which holds: sfpow34*xr34 <= IXMAX_VAL
  */

 static  FLOAT
 calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_t sf)
 {
     DOUBLEX x[4];
     int     l3[4];
     const FLOAT sfpow = pow20[sf + Q_MAX2]; /*pow(2.0,sf/4.0); */
     const FLOAT sfpow34 = ipow20[sf]; /*pow(sfpow,-3.0/4.0); */

     FLOAT   xfsf = 0;
     unsigned int j = bw >> 1;
     unsigned int const remaining = (j & 0x01u);

     for (j >>= 1; j > 0; --j) {
         x[0] = sfpow34 * xr34[0];
         x[1] = sfpow34 * xr34[1];
         x[2] = sfpow34 * xr34[2];
         x[3] = sfpow34 * xr34[3];

         k_34_4(x, l3);

         x[0] = fabs(xr[0]) - sfpow * pow43[l3[0]];
         x[1] = fabs(xr[1]) - sfpow * pow43[l3[1]];
         x[2] = fabs(xr[2]) - sfpow * pow43[l3[2]];
         x[3] = fabs(xr[3]) - sfpow * pow43[l3[3]];
         xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);

         xr += 4;
         xr34 += 4;
     }
     if (remaining) {
         x[0] = sfpow34 * xr34[0];
         x[1] = sfpow34 * xr34[1];

         k_34_2(x, l3);

         x[0] = fabs(xr[0]) - sfpow * pow43[l3[0]];
         x[1] = fabs(xr[1]) - sfpow * pow43[l3[1]];
         xfsf += x[0] * x[0] + x[1] * x[1];
     }
     return xfsf;
 }


 struct calc_noise_cache {
     int     valid;
     FLOAT   value;
 };

 typedef struct calc_noise_cache calc_noise_cache_t;


 static  uint8_t
 tri_calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
                        uint8_t sf, calc_noise_cache_t * did_it)
 {
     if (did_it[sf].valid == 0) {
         did_it[sf].valid = 1;
         did_it[sf].value = calc_sfb_noise_x34(xr, xr34, bw, sf);
     }
     if (l3_xmin < did_it[sf].value) {
         return 1;
     }
     if (sf < 255) {
         uint8_t const sf_x = sf + 1;
         if (did_it[sf_x].valid == 0) {
             did_it[sf_x].valid = 1;
             did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
         }
         if (l3_xmin < did_it[sf_x].value) {
             return 1;
         }
     }
     if (sf > 0) {
         uint8_t const sf_x = sf - 1;
         if (did_it[sf_x].valid == 0) {
             did_it[sf_x].valid = 1;
             did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
         }
         if (l3_xmin < did_it[sf_x].value) {
             return 1;
         }
     }
     return 0;
 }

 #if 0
 /**
  *  Robert Hegemann 2001-05-01
  *  calculates quantization step size determined by allowed masking
  */
 static int
 calc_scalefac(FLOAT8 l3_xmin, int bw)
 {
     FLOAT8 const c = 5.799142446; /* 10 * 10^(2/3) * log10(4/3) */
     return 210 + (int) (c * log10(l3_xmin / bw) - .5);
 }
 #endif

 /* the find_scalefac* routines calculate
  * a quantization step size which would
  * introduce as much noise as is allowed.
  * The larger the step size the more
  * quantization noise we'll get. The
  * scalefactors are there to lower the
  * global step size, allowing limited
  * differences in quantization step sizes
  * per band (shaping the noise).
  */

 static  uint8_t
 find_scalefac_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
                   uint8_t sf_min)
 {
     calc_noise_cache_t did_it[256];
     uint8_t sf = 128, sf_ok = 255, delsf = 128, seen_good_one = 0, i;
     memset(did_it, 0, sizeof(did_it));
     for (i = 0; i < 8; ++i) {
         delsf >>= 1;
         if (sf <= sf_min) {
             sf += delsf;
         }
         else {
             uint8_t const bad = tri_calc_sfb_noise_x34(xr, xr34, l3_xmin, bw, sf, did_it);
             if (bad) {  /* distortion.  try a smaller scalefactor */
                 sf -= delsf;
             }
             else {
                 sf_ok = sf;
                 sf += delsf;
                 seen_good_one = 1;
             }
         }
     }
     /*  returning a scalefac without distortion, if possible
      */
     if (seen_good_one > 0) {
         return sf_ok;
     }
     if (sf <= sf_min) {
         return sf_min;
     }
     return sf;
 }


 /***********************************************************************
  *
  *      calc_short_block_vbr_sf()
  *      calc_long_block_vbr_sf()
  *
  *  Mark Taylor 2000-??-??
  *  Robert Hegemann 2000-10-25 made functions of it
  *
  ***********************************************************************/

 /* a variation for vbr-mtrh */
 static int
 block_sf(algo_t * that, const FLOAT l3_xmin[SFBMAX], int vbrsf[SFBMAX], int vbrsfmin[SFBMAX])
 {
     FLOAT   max_xr34;
     const FLOAT *const xr = &that->cod_info->xr[0];
     const FLOAT *const xr34_orig = &that->xr34orig[0];
     const int *const width = &that->cod_info->width[0];
     unsigned int const max_nonzero_coeff = (unsigned int) that->cod_info->max_nonzero_coeff;
     uint8_t maxsf = 0;
     int     sfb = 0;
     unsigned int j = 0, i = 0;
     int const psymax = that->cod_info->psymax;

     assert(that->cod_info->max_nonzero_coeff >= 0);

     that->mingain_l = 0;
     that->mingain_s[0] = 0;
     that->mingain_s[1] = 0;
     that->mingain_s[2] = 0;
     while (j <= max_nonzero_coeff) {
         unsigned int const w = (unsigned int) width[sfb];
         unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
         unsigned int l = w;
         uint8_t m1, m2;
         if (l > m) {
             l = m;
         }
         max_xr34 = max_x34(&xr34_orig[j], l);

         m1 = find_lowest_scalefac(max_xr34);
         vbrsfmin[sfb] = m1;
         if (that->mingain_l < m1) {
             that->mingain_l = m1;
         }
         if (that->mingain_s[i] < m1) {
             that->mingain_s[i] = m1;
         }
         if (++i > 2) {
             i = 0;
         }
         if (sfb < psymax) {
             if (below_noise_floor(&xr[j], l3_xmin[sfb], l) == 0) {
                 m2 = find_scalefac_x34(&xr[j], &xr34_orig[j], l3_xmin[sfb], l, m1);
 #if 0
                 if (0) {
                     /** Robert Hegemann 2007-09-29:
                      *  It seems here is some more potential for speed improvements.
                      *  Current find method does 11-18 quantization calculations.
                      *  Using a "good guess" may help to reduce this amount.
                      */
                     int     guess = calc_scalefac(l3_xmin[sfb], l);
                     DEBUGF(that->gfc, "sfb=%3d guess=%3d found=%3d diff=%3d\n", sfb, guess, m2,
                            m2 - guess);
                 }
 #endif
                 if (maxsf < m2) {
                     maxsf = m2;
                 }
             }
             else {
                 m2 = 255;
                 maxsf = 255;
             }
         }
         else {
             if (maxsf < m1) {
                 maxsf = m1;
             }
             m2 = maxsf;
         }
         vbrsf[sfb] = m2;
         ++sfb;
         j += w;
     }
     for (; sfb < SFBMAX; ++sfb) {
         vbrsf[sfb] = maxsf;
         vbrsfmin[sfb] = 0;
     }
     return maxsf;
 }


 /***********************************************************************
  *
  *  quantize xr34 based on scalefactors
  *
  *  block_xr34
  *
  *  Mark Taylor 2000-??-??
  *  Robert Hegemann 2000-10-20 made functions of them
  *
  ***********************************************************************/

 static void
 quantize_x34(const algo_t * that)
 {
     DOUBLEX x[4];
     const FLOAT *xr34_orig = that->xr34orig;
     gr_info *const cod_info = that->cod_info;
     int const ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
     int    *l3 = cod_info->l3_enc;
     unsigned int j = 0, sfb = 0;
     unsigned int const max_nonzero_coeff = (unsigned int) cod_info->max_nonzero_coeff;

     assert(cod_info->max_nonzero_coeff >= 0);
     assert(cod_info->max_nonzero_coeff < 576);

     while (j <= max_nonzero_coeff) {
         int const s =
             (cod_info->scalefac[sfb] + (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep
             + cod_info->subblock_gain[cod_info->window[sfb]] * 8;
         uint8_t const sfac = (uint8_t) (cod_info->global_gain - s);
         FLOAT const sfpow34 = ipow20[sfac];
         unsigned int const w = (unsigned int) cod_info->width[sfb];
         unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
         unsigned int l = w;
         unsigned int remaining;

         assert((cod_info->global_gain - s) >= 0);
         assert(cod_info->width[sfb] >= 0);

         if (l > m) {
             l = m;
         }
         j += w;
         ++sfb;
         l >>= 1;
         remaining = (l & 1);

         for (l >>= 1; l > 0; --l) {
             x[0] = sfpow34 * xr34_orig[0];
             x[1] = sfpow34 * xr34_orig[1];
             x[2] = sfpow34 * xr34_orig[2];
             x[3] = sfpow34 * xr34_orig[3];

             k_34_4(x, l3);

             l3 += 4;
             xr34_orig += 4;
         }
         if (remaining) {
             x[0] = sfpow34 * xr34_orig[0];
             x[1] = sfpow34 * xr34_orig[1];

             k_34_2(x, l3);

             l3 += 2;
             xr34_orig += 2;
         }
     }
 }


 static const uint8_t max_range_short[SBMAX_s * 3] = {
     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
     0, 0, 0
 };

 static const uint8_t max_range_long[SBMAX_l] = {
     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0
 };

 static const uint8_t max_range_long_lsf_pretab[SBMAX_l] = {
     7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };


 /*
     sfb=0..5  scalefac < 16
     sfb>5     scalefac < 8

     ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
     ol_sf =  (cod_info->global_gain-210.0);
     ol_sf -= 8*cod_info->subblock_gain[i];
     ol_sf -= ifqstep*scalefac[gr][ch].s[sfb][i];
 */

 static void
 set_subblock_gain(gr_info * cod_info, const int mingain_s[3], int sf[])
 {
     const int maxrange1 = 15, maxrange2 = 7;
     const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
     int    *const sbg = cod_info->subblock_gain;
     unsigned int const psymax = (unsigned int) cod_info->psymax;
     unsigned int psydiv = 18;
     int     sbg0, sbg1, sbg2;
     unsigned int sfb, i;
     int     min_sbg = 7;

     if (psydiv > psymax) {
         psydiv = psymax;
     }
     for (i = 0; i < 3; ++i) {
         int     maxsf1 = 0, maxsf2 = 0, minsf = 1000;
         /* see if we should use subblock gain */
         for (sfb = i; sfb < psydiv; sfb += 3) { /* part 1 */
             int const v = -sf[sfb];
             if (maxsf1 < v) {
                 maxsf1 = v;
             }
             if (minsf > v) {
                 minsf = v;
             }
         }
         for (; sfb < SFBMAX; sfb += 3) { /* part 2 */
             int const v = -sf[sfb];
             if (maxsf2 < v) {
                 maxsf2 = v;
             }
             if (minsf > v) {
                 minsf = v;
             }
         }

         /* boost subblock gain as little as possible so we can
          * reach maxsf1 with scalefactors
          * 8*sbg >= maxsf1
          */
         {
             int const m1 = maxsf1 - (maxrange1 << ifqstepShift);
             int const m2 = maxsf2 - (maxrange2 << ifqstepShift);

             maxsf1 = Max(m1, m2);
         }
         if (minsf > 0) {
             sbg[i] = minsf >> 3;
         }
         else {
             sbg[i] = 0;
         }
         if (maxsf1 > 0) {
             int const m1 = sbg[i];
             int const m2 = (maxsf1 + 7) >> 3;
             sbg[i] = Max(m1, m2);
         }
         if (sbg[i] > 0 && mingain_s[i] > (cod_info->global_gain - sbg[i] * 8)) {
             sbg[i] = (cod_info->global_gain - mingain_s[i]) >> 3;
         }
         if (sbg[i] > 7) {
             sbg[i] = 7;
         }
         if (min_sbg > sbg[i]) {
             min_sbg = sbg[i];
         }
     }
     sbg0 = sbg[0] * 8;
     sbg1 = sbg[1] * 8;
     sbg2 = sbg[2] * 8;
     for (sfb = 0; sfb < SFBMAX; sfb += 3) {
         sf[sfb + 0] += sbg0;
         sf[sfb + 1] += sbg1;
         sf[sfb + 2] += sbg2;
     }
     if (min_sbg > 0) {
         for (i = 0; i < 3; ++i) {
             sbg[i] -= min_sbg;
         }
         cod_info->global_gain -= min_sbg * 8;
     }
 }


 /*
 	  ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
 	  ol_sf =  (cod_info->global_gain-210.0);
 	  ol_sf -= ifqstep*scalefac[gr][ch].l[sfb];
 	  if (cod_info->preflag && sfb>=11)
 	  ol_sf -= ifqstep*pretab[sfb];
 */
 static void
 set_scalefacs(gr_info * cod_info, const int *vbrsfmin, int sf[], const uint8_t * max_range)
 {
     const int ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
     const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
     int    *const scalefac = cod_info->scalefac;
     int const sfbmax = cod_info->sfbmax;
     int     sfb;
     int const *const sbg = cod_info->subblock_gain;
     int const *const window = cod_info->window;
     int const preflag = cod_info->preflag;

     if (preflag) {
         for (sfb = 11; sfb < sfbmax; ++sfb) {
             sf[sfb] += pretab[sfb] * ifqstep;
         }
     }
     for (sfb = 0; sfb < sfbmax; ++sfb) {
         int const gain = cod_info->global_gain - (sbg[window[sfb]] * 8)
             - ((preflag ? pretab[sfb] : 0) * ifqstep);

         if (sf[sfb] < 0) {
             int const m = gain - vbrsfmin[sfb];
             /* ifqstep*scalefac >= -sf[sfb], so round UP */
             scalefac[sfb] = (ifqstep - 1 - sf[sfb]) >> ifqstepShift;

             if (scalefac[sfb] > max_range[sfb]) {
                 scalefac[sfb] = max_range[sfb];
             }
             if (scalefac[sfb] > 0 && (scalefac[sfb] << ifqstepShift) > m) {
                 scalefac[sfb] = m >> ifqstepShift;
             }
         }
         else {
             scalefac[sfb] = 0;
         }
     }
     for (; sfb < SFBMAX; ++sfb) {
         scalefac[sfb] = 0; /* sfb21 */
     }
 }


 #ifndef NDEBUG
 static int
 checkScalefactor(const gr_info * cod_info, const int vbrsfmin[SFBMAX])
 {
     int const ifqstep = cod_info->scalefac_scale == 0 ? 2 : 4;
     int     sfb;
     for (sfb = 0; sfb < cod_info->psymax; ++sfb) {
         const int s =
             ((cod_info->scalefac[sfb] +
               (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep) +
             cod_info->subblock_gain[cod_info->window[sfb]] * 8;

         if ((cod_info->global_gain - s) < vbrsfmin[sfb]) {
             /*
                fprintf( stdout, "sf %d\n", sfb );
                fprintf( stdout, "min %d\n", vbrsfmin[sfb] );
                fprintf( stdout, "ggain %d\n", cod_info->global_gain );
                fprintf( stdout, "scalefac %d\n", cod_info->scalefac[sfb] );
                fprintf( stdout, "pretab %d\n", (cod_info->preflag ? pretab[sfb] : 0) );
                fprintf( stdout, "scale %d\n", (cod_info->scalefac_scale + 1) );
                fprintf( stdout, "subgain %d\n", cod_info->subblock_gain[cod_info->window[sfb]] * 8 );
                fflush( stdout );
                exit(-1);
              */
             return 0;
         }
     }
     return 1;
 }
 #endif


 /******************************************************************
  *
  *  short block scalefacs
  *
  ******************************************************************/

 static void
 short_block_constrain(const algo_t * that, const int vbrsf[SFBMAX],
                       const int vbrsfmin[SFBMAX], int vbrmax)
 {
     gr_info *const cod_info = that->cod_info;
     lame_internal_flags const *const gfc = that->gfc;
     int const maxminsfb = that->mingain_l;
     int     mover, maxover0 = 0, maxover1 = 0, delta = 0;
     int     v, v0, v1;
     int     sfb;
     int const psymax = cod_info->psymax;

     for (sfb = 0; sfb < psymax; ++sfb) {
         assert(vbrsf[sfb] >= vbrsfmin[sfb]);
         v = vbrmax - vbrsf[sfb];
         if (delta < v) {
             delta = v;
         }
         v0 = v - (4 * 14 + 2 * max_range_short[sfb]);
         v1 = v - (4 * 14 + 4 * max_range_short[sfb]);
         if (maxover0 < v0) {
             maxover0 = v0;
         }
         if (maxover1 < v1) {
             maxover1 = v1;
         }
     }
     if (gfc->noise_shaping == 2) {
         /* allow scalefac_scale=1 */
         mover = Min(maxover0, maxover1);
     }
     else {
         mover = maxover0;
     }
     if (delta > mover) {
         delta = mover;
     }
     vbrmax -= delta;
     maxover0 -= mover;
     maxover1 -= mover;

     if (maxover0 == 0) {
         cod_info->scalefac_scale = 0;
     }
     else if (maxover1 == 0) {
         cod_info->scalefac_scale = 1;
     }
     if (vbrmax < maxminsfb) {
         vbrmax = maxminsfb;
     }
     cod_info->global_gain = vbrmax;

     if (cod_info->global_gain < 0) {
         cod_info->global_gain = 0;
     }
     else if (cod_info->global_gain > 255) {
         cod_info->global_gain = 255;
     }
     {
         int     sf_temp[SFBMAX];
         for (sfb = 0; sfb < SFBMAX; ++sfb) {
             sf_temp[sfb] = vbrsf[sfb] - vbrmax;
         }
         set_subblock_gain(cod_info, &that->mingain_s[0], sf_temp);
         set_scalefacs(cod_info, vbrsfmin, sf_temp, max_range_short);
     }
     assert(checkScalefactor(cod_info, vbrsfmin));
 }


 /******************************************************************
  *
  *  long block scalefacs
  *
  ******************************************************************/

 static void
 long_block_constrain(const algo_t * that, const int vbrsf[SFBMAX], const int vbrsfmin[SFBMAX],
                      int vbrmax)
 {
     gr_info *const cod_info = that->cod_info;
     lame_internal_flags const *const gfc = that->gfc;
     uint8_t const *max_rangep;
     int const maxminsfb = that->mingain_l;
     int     sfb;
     int     maxover0, maxover1, maxover0p, maxover1p, mover, delta = 0;
     int     v, v0, v1, v0p, v1p, vm0p = 1, vm1p = 1;
     int const psymax = cod_info->psymax;

     max_rangep = gfc->mode_gr == 2 ? max_range_long : max_range_long_lsf_pretab;

     maxover0 = 0;
     maxover1 = 0;
     maxover0p = 0;      /* pretab */
     maxover1p = 0;      /* pretab */

     for (sfb = 0; sfb < psymax; ++sfb) {
         assert(vbrsf[sfb] >= vbrsfmin[sfb]);
         v = vbrmax - vbrsf[sfb];
         if (delta < v) {
             delta = v;
         }
         v0 = v - 2 * max_range_long[sfb];
         v1 = v - 4 * max_range_long[sfb];
         v0p = v - 2 * (max_rangep[sfb] + pretab[sfb]);
         v1p = v - 4 * (max_rangep[sfb] + pretab[sfb]);
         if (maxover0 < v0) {
             maxover0 = v0;
         }
         if (maxover1 < v1) {
             maxover1 = v1;
         }
         if (maxover0p < v0p) {
             maxover0p = v0p;
         }
         if (maxover1p < v1p) {
             maxover1p = v1p;
         }
     }
     if (vm0p == 1) {
         int     gain = vbrmax - maxover0p;
         if (gain < maxminsfb) {
             gain = maxminsfb;
         }
         for (sfb = 0; sfb < psymax; ++sfb) {
             int const a = (gain - vbrsfmin[sfb]) - 2 * pretab[sfb];
             if (a <= 0) {
                 vm0p = 0;
                 vm1p = 0;
                 break;
             }
         }
     }
     if (vm1p == 1) {
         int     gain = vbrmax - maxover1p;
         if (gain < maxminsfb) {
             gain = maxminsfb;
         }
         for (sfb = 0; sfb < psymax; ++sfb) {
             int const b = (gain - vbrsfmin[sfb]) - 4 * pretab[sfb];
             if (b <= 0) {
                 vm1p = 0;
                 break;
             }
         }
     }
     if (vm0p == 0) {
         maxover0p = maxover0;
     }
     if (vm1p == 0) {
         maxover1p = maxover1;
     }
     if (gfc->noise_shaping != 2) {
         maxover1 = maxover0;
         maxover1p = maxover0p;
     }
     mover = Min(maxover0, maxover0p);
     mover = Min(mover, maxover1);
     mover = Min(mover, maxover1p);

     if (delta > mover) {
         delta = mover;
     }
     vbrmax -= delta;
     if (vbrmax < maxminsfb) {
         vbrmax = maxminsfb;
     }
     maxover0 -= mover;
     maxover0p -= mover;
     maxover1 -= mover;
     maxover1p -= mover;

     if (maxover0 == 0) {
         cod_info->scalefac_scale = 0;
         cod_info->preflag = 0;
         max_rangep = max_range_long;
     }
     else if (maxover0p == 0) {
         cod_info->scalefac_scale = 0;
         cod_info->preflag = 1;
     }
     else if (maxover1 == 0) {
         cod_info->scalefac_scale = 1;
         cod_info->preflag = 0;
         max_rangep = max_range_long;
     }
     else if (maxover1p == 0) {
         cod_info->scalefac_scale = 1;
         cod_info->preflag = 1;
     }
     else {
         assert(0);      /* this should not happen */
     }
     cod_info->global_gain = vbrmax;
     if (cod_info->global_gain < 0) {
         cod_info->global_gain = 0;
     }
     else if (cod_info->global_gain > 255) {
         cod_info->global_gain = 255;
     }
     {
         int     sf_temp[SFBMAX];
         for (sfb = 0; sfb < SFBMAX; ++sfb) {
             sf_temp[sfb] = vbrsf[sfb] - vbrmax;
         }
         set_scalefacs(cod_info, vbrsfmin, sf_temp, max_rangep);
     }
     assert(checkScalefactor(cod_info, vbrsfmin));
 }


 static void
 bitcount(const algo_t * that)
 {
     int     rc;

     if (that->gfc->mode_gr == 2) {
         rc = scale_bitcount(that->cod_info);
     }
     else {
         rc = scale_bitcount_lsf(that->gfc, that->cod_info);
     }
     if (rc == 0) {
         return;
     }
     /*  this should not happen due to the way the scalefactors are selected  */
     ERRORF(that->gfc, "INTERNAL ERROR IN VBR NEW CODE (986), please send bug report\n");
     exit(-1);
 }


 static int
 quantizeAndCountBits(const algo_t * that)
 {
     quantize_x34(that);
     that->cod_info->part2_3_length = noquant_count_bits(that->gfc, that->cod_info, 0);
     return that->cod_info->part2_3_length;
 }


 static int
 tryGlobalStepsize(const algo_t * that, const int sfwork[SFBMAX],
                   const int vbrsfmin[SFBMAX], int delta)
 {
     FLOAT const xrpow_max = that->cod_info->xrpow_max;
     int     sftemp[SFBMAX], i, nbits;
     int     gain, vbrmax = 0;
     for (i = 0; i < SFBMAX; ++i) {
         gain = sfwork[i] + delta;
         if (gain < vbrsfmin[i]) {
             gain = vbrsfmin[i];
         }
         if (gain > 255) {
             gain = 255;
         }
         if (vbrmax < gain) {
             vbrmax = gain;
         }
         sftemp[i] = gain;
     }
     that->alloc(that, sftemp, vbrsfmin, vbrmax);
     bitcount(that);
     nbits = quantizeAndCountBits(that);
     that->cod_info->xrpow_max = xrpow_max;
     return nbits;
 }


 static void
 searchGlobalStepsizeMax(const algo_t * that, const int sfwork[SFBMAX],
                         const int vbrsfmin[SFBMAX], int target)
 {
     gr_info const *const cod_info = that->cod_info;
     const int gain = cod_info->global_gain;
     int     curr = gain;
     int     gain_ok = 1024;
     int     nbits = LARGE_BITS;
     int     l = gain, r = 512;

     assert(gain >= 0);
     while (l <= r) {
         curr = (l + r) >> 1;
         nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
         if (nbits == 0 || (nbits + cod_info->part2_length) < target) {
             r = curr - 1;
             gain_ok = curr;
         }
         else {
             l = curr + 1;
             if (gain_ok == 1024) {
                 gain_ok = curr;
             }
         }
     }
     if (gain_ok != curr) {
         curr = gain_ok;
         nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
     }
 }


 static int
 sfDepth(const int sfwork[SFBMAX])
 {
     int     m = 0;
     unsigned int i, j;
     for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
         int const di = 255 - sfwork[i];
         if (m < di) {
             m = di;
         }
         assert(sfwork[i] >= 0);
         assert(sfwork[i] <= 255);
     }
     assert(m >= 0);
     assert(m <= 255);
     return m;
 }


 static void
 cutDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int cut)
 {
     unsigned int i, j;
     for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
         int const x = sfwork[i];
         sf_out[i] = x < cut ? x : cut;
     }
 }


 static int
 flattenDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int dm, int k, int p)
 {
     unsigned int i, j;
     int     x, sfmax = 0;
     if (dm > 0) {
         for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
             int const di = p - sfwork[i];
             x = sfwork[i] + (k * di) / dm;
             if (x < 0) {
                 x = 0;
             }
             else {
                 if (x > 255) {
                     x = 255;
                 }
             }
             sf_out[i] = x;
             if (sfmax < x) {
                 sfmax = x;
             }
         }
     }
     else {
         for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
             x = sfwork[i];
             sf_out[i] = x;
             if (sfmax < x) {
                 sfmax = x;
             }
         }
     }
     return sfmax;
 }


 static int
 tryThatOne(algo_t * that, int sftemp[SFBMAX], const int vbrsfmin[SFBMAX], int vbrmax)
 {
     FLOAT const xrpow_max = that->cod_info->xrpow_max;
     int     nbits = LARGE_BITS;
     that->alloc(that, sftemp, vbrsfmin, vbrmax);
     bitcount(that);
     nbits = quantizeAndCountBits(that);
     nbits += that->cod_info->part2_length;
     that->cod_info->xrpow_max = xrpow_max;
     return nbits;
 }


 static void
 outOfBitsStrategy(algo_t * that, const int sfwork[SFBMAX], const int vbrsfmin[SFBMAX], int target)
 {
     int     wrk[SFBMAX];
     int const dm = sfDepth(sfwork);
     int const p = that->cod_info->global_gain;
     int     nbits;

     /* PART 1 */
     {
         int     bi = dm / 2;
         int     bi_ok = -1;
         int     bu = 0;
         int     bo = dm;
         for (;;) {
             int const sfmax = flattenDistribution(sfwork, wrk, dm, bi, p);
             nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
             if (nbits <= target) {
                 bi_ok = bi;
                 bo = bi - 1;
             }
             else {
                 bu = bi + 1;
             }
             if (bu <= bo) {
                 bi = (bu + bo) / 2;
             }
             else {
                 break;
             }
         }
         if (bi_ok >= 0) {
             if (bi != bi_ok) {
                 int const sfmax = flattenDistribution(sfwork, wrk, dm, bi_ok, p);
                 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
             }
             return;
         }
     }

     /* PART 2: */
     {
         int     bi = (255 + p) / 2;
         int     bi_ok = -1;
         int     bu = p;
         int     bo = 255;
         for (;;) {
             int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi);
             nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
             if (nbits <= target) {
                 bi_ok = bi;
                 bo = bi - 1;
             }
             else {
                 bu = bi + 1;
             }
             if (bu <= bo) {
                 bi = (bu + bo) / 2;
             }
             else {
                 break;
             }
         }
         if (bi_ok >= 0) {
             if (bi != bi_ok) {
                 int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi_ok);
                 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
             }
             return;
         }
     }

     /* fall back to old code, likely to be never called */
     searchGlobalStepsizeMax(that, wrk, vbrsfmin, target);
 }


 static int
 reduce_bit_usage(lame_internal_flags * gfc, int gr, int ch
 #if 0
                  , const FLOAT xr34orig[576], const FLOAT l3_xmin[SFBMAX], int maxbits
 #endif
     )
 {
     gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
     /*  try some better scalefac storage
      */
     best_scalefac_store(gfc, gr, ch, &gfc->l3_side);

     /*  best huffman_divide may save some bits too
      */
     if (gfc->use_best_huffman == 1)
         best_huffman_divide(gfc, cod_info);
 #if 0
     /* truncate small spectrum seems to introduce pops, disabled(RH 050918) */
     if (gfc->substep_shaping & 1) {
         trancate_smallspectrums(gfc, cod_info, l3_xmin, xr34orig);
     }
     else if (cod_info->part2_3_length > maxbits - cod_info->part2_length) {
         trancate_smallspectrums(gfc, cod_info, l3_xmin, xr34orig);
     }
 #endif
     return cod_info->part2_3_length + cod_info->part2_length;
 }


 int
 VBR_encode_frame(lame_internal_flags * gfc, FLOAT xr34orig[2][2][576],
                  FLOAT l3_xmin[2][2][SFBMAX], int max_bits[2][2])
 {
     int     sfwork_[2][2][SFBMAX];
     int     vbrsfmin_[2][2][SFBMAX];
     algo_t  that_[2][2];
     int const ngr = gfc->mode_gr;
     int const nch = gfc->channels_out;
     int     max_nbits_ch[2][2];
     int     max_nbits_gr[2];
     int     max_nbits_fr = 0;
     int     use_nbits_ch[2][2];
     int     use_nbits_gr[2];
     int     use_nbits_fr = 0;
     int     gr, ch;
     int     ok, sum_fr;

     /* set up some encoding parameters
      */
     for (gr = 0; gr < ngr; ++gr) {
         max_nbits_gr[gr] = 0;
         for (ch = 0; ch < nch; ++ch) {
             max_nbits_ch[gr][ch] = max_bits[gr][ch];
             use_nbits_ch[gr][ch] = 0;
             max_nbits_gr[gr] += max_bits[gr][ch];
             max_nbits_fr += max_bits[gr][ch];
             that_[gr][ch].gfc = gfc;
             that_[gr][ch].cod_info = &gfc->l3_side.tt[gr][ch];
             that_[gr][ch].xr34orig = xr34orig[gr][ch];
             if (that_[gr][ch].cod_info->block_type == SHORT_TYPE) {
                 that_[gr][ch].alloc = short_block_constrain;
             }
             else {
                 that_[gr][ch].alloc = long_block_constrain;
             }
         }               /* for ch */
     }

     /* searches scalefactors
      */
     for (gr = 0; gr < ngr; ++gr) {
         for (ch = 0; ch < nch; ++ch) {
             if (max_bits[gr][ch] > 0) {
                 algo_t *that = &that_[gr][ch];
                 int    *sfwork = sfwork_[gr][ch];
                 int    *vbrsfmin = vbrsfmin_[gr][ch];
                 int     vbrmax;

                 vbrmax = block_sf(that, l3_xmin[gr][ch], sfwork, vbrsfmin);
                 that->alloc(that, sfwork, vbrsfmin, vbrmax);
                 bitcount(that);
             }
             else {
                 /*  xr contains no energy
                  *  l3_enc, our encoding data, will be quantized to zero
                  *  continue with next channel
                  */
             }
         }               /* for ch */
     }

     /* encode 'as is'
      */
     use_nbits_fr = 0;
     for (gr = 0; gr < ngr; ++gr) {
         use_nbits_gr[gr] = 0;
         for (ch = 0; ch < nch; ++ch) {
             algo_t *that = &that_[gr][ch];
             if (max_bits[gr][ch] > 0) {
                 unsigned int const max_nonzero_coeff =
                     (unsigned int) that->cod_info->max_nonzero_coeff;

                 assert(max_nonzero_coeff < 576);
                 memset(&that->cod_info->l3_enc[max_nonzero_coeff], 0,
                        (576u - max_nonzero_coeff) * sizeof(that->cod_info->l3_enc[0]));

                 (void) quantizeAndCountBits(that);
             }
             else {
                 /*  xr contains no energy
                  *  l3_enc, our encoding data, will be quantized to zero
                  *  continue with next channel
                  */
             }
             use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
             use_nbits_gr[gr] += use_nbits_ch[gr][ch];
         }               /* for ch */
         use_nbits_fr += use_nbits_gr[gr];
     }

     /* check bit constrains
      */
     if (use_nbits_fr <= max_nbits_fr) {
         ok = 1;
         for (gr = 0; gr < ngr; ++gr) {
             if (use_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
                 /* violates the rule that every granule has to use no more
                  * bits than MAX_BITS_PER_GRANULE
                  */
                 ok = 0;
             }
             for (ch = 0; ch < nch; ++ch) {
                 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
                     /* violates the rule that every gr_ch has to use no more
                      * bits than MAX_BITS_PER_CHANNEL
                      *
                      * This isn't explicitly stated in the ISO docs, but the
                      * part2_3_length field has only 12 bits, that makes it
                      * up to a maximum size of 4095 bits!!!
                      */
                     ok = 0;
                 }
             }
         }
         if (ok) {
             return use_nbits_fr;
         }
     }

     /* OK, we are in trouble and have to define how many bits are
      * to be used for each granule
      */
     {
         ok = 1;
         sum_fr = 0;

         for (gr = 0; gr < ngr; ++gr) {
             max_nbits_gr[gr] = 0;
             for (ch = 0; ch < nch; ++ch) {
                 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
                     max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
                 }
                 else {
                     max_nbits_ch[gr][ch] = use_nbits_ch[gr][ch];
                 }
                 max_nbits_gr[gr] += max_nbits_ch[gr][ch];
             }
             if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
                 float   f[2], s = 0;
                 for (ch = 0; ch < nch; ++ch) {
                     if (max_nbits_ch[gr][ch] > 0) {
                         f[ch] = sqrt(sqrt(max_nbits_ch[gr][ch]));
                         s += f[ch];
                     }
                     else {
                         f[ch] = 0;
                     }
                 }
                 for (ch = 0; ch < nch; ++ch) {
                     if (s > 0) {
                         max_nbits_ch[gr][ch] = MAX_BITS_PER_GRANULE * f[ch] / s;
                     }
                     else {
                         max_nbits_ch[gr][ch] = 0;
                     }
                 }
                 if (nch > 1) {
                     if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
                         max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
                         max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
                         max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
                     }
                     if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
                         max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
                         max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
                         max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
                     }
                     if (max_nbits_ch[gr][0] > MAX_BITS_PER_CHANNEL) {
                         max_nbits_ch[gr][0] = MAX_BITS_PER_CHANNEL;
                     }
                     if (max_nbits_ch[gr][1] > MAX_BITS_PER_CHANNEL) {
                         max_nbits_ch[gr][1] = MAX_BITS_PER_CHANNEL;
                     }
                 }
                 max_nbits_gr[gr] = 0;
                 for (ch = 0; ch < nch; ++ch) {
                     max_nbits_gr[gr] += max_nbits_ch[gr][ch];
                 }
             }
             sum_fr += max_nbits_gr[gr];
         }
         if (sum_fr > max_nbits_fr) {
             {
                 float   f[2], s = 0;
                 for (gr = 0; gr < ngr; ++gr) {
                     if (max_nbits_gr[gr] > 0) {
                         f[gr] = sqrt(max_nbits_gr[gr]);
                         s += f[gr];
                     }
                     else {
                         f[gr] = 0;
                     }
                 }
                 for (gr = 0; gr < ngr; ++gr) {
                     if (s > 0) {
                         max_nbits_gr[gr] = max_nbits_fr * f[gr] / s;
                     }
                     else {
                         max_nbits_gr[gr] = 0;
                     }
                 }
             }
             if (ngr > 1) {
                 if (max_nbits_gr[0] > use_nbits_gr[0] + 125) {
                     max_nbits_gr[1] += max_nbits_gr[0];
                     max_nbits_gr[1] -= use_nbits_gr[0] + 125;
                     max_nbits_gr[0] = use_nbits_gr[0] + 125;
                 }
                 if (max_nbits_gr[1] > use_nbits_gr[1] + 125) {
                     max_nbits_gr[0] += max_nbits_gr[1];
                     max_nbits_gr[0] -= use_nbits_gr[1] + 125;
                     max_nbits_gr[1] = use_nbits_gr[1] + 125;
                 }
                 for (gr = 0; gr < ngr; ++gr) {
                     if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
                         max_nbits_gr[gr] = MAX_BITS_PER_GRANULE;
                     }
                 }
             }
             for (gr = 0; gr < ngr; ++gr) {
                 float   f[2], s = 0;
                 for (ch = 0; ch < nch; ++ch) {
                     if (max_nbits_ch[gr][ch] > 0) {
                         f[ch] = sqrt(max_nbits_ch[gr][ch]);
                         s += f[ch];
                     }
                     else {
                         f[ch] = 0;
                     }
                 }
                 for (ch = 0; ch < nch; ++ch) {
                     if (s > 0) {
                         max_nbits_ch[gr][ch] = max_nbits_gr[gr] * f[ch] / s;
                     }
                     else {
                         max_nbits_ch[gr][ch] = 0;
                     }
                 }
                 if (nch > 1) {
                     if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
                         max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
                         max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
                         max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
                     }
                     if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
                         max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
                         max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
                         max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
                     }
                     for (ch = 0; ch < nch; ++ch) {
                         if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
                             max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
                         }
                     }
                 }
             }
         }
         /* sanity check */
         sum_fr = 0;
         for (gr = 0; gr < ngr; ++gr) {
             int     sum_gr = 0;
             for (ch = 0; ch < nch; ++ch) {
                 sum_gr += max_nbits_ch[gr][ch];
                 if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
                     ok = 0;
                 }
             }
             sum_fr += sum_gr;
             if (sum_gr > MAX_BITS_PER_GRANULE) {
                 ok = 0;
             }
         }
         if (sum_fr > max_nbits_fr) {
             ok = 0;
         }
         if (!ok) {
             /* we must have done something wrong, fallback to 'on_pe' based constrain */
             for (gr = 0; gr < ngr; ++gr) {
                 for (ch = 0; ch < nch; ++ch) {
                     max_nbits_ch[gr][ch] = max_bits[gr][ch];
                 }
             }
         }
     }

     /* we already called the 'best_scalefac_store' function, so we need to reset some
      * variables before we can do it again.
      */
     for (ch = 0; ch < nch; ++ch) {
         gfc->l3_side.scfsi[ch][0] = 0;
         gfc->l3_side.scfsi[ch][1] = 0;
         gfc->l3_side.scfsi[ch][2] = 0;
         gfc->l3_side.scfsi[ch][3] = 0;
     }
     for (gr = 0; gr < ngr; ++gr) {
         for (ch = 0; ch < nch; ++ch) {
             gfc->l3_side.tt[gr][ch].scalefac_compress = 0;
         }
     }

     /* alter our encoded data, until it fits into the target bitrate
      */
     use_nbits_fr = 0;
     for (gr = 0; gr < ngr; ++gr) {
         use_nbits_gr[gr] = 0;
         for (ch = 0; ch < nch; ++ch) {
             algo_t *that = &that_[gr][ch];
             use_nbits_ch[gr][ch] = 0;
             if (max_bits[gr][ch] > 0) {
                 int    *sfwork = sfwork_[gr][ch];
                 int    *vbrsfmin = vbrsfmin_[gr][ch];
                 cutDistribution(sfwork, sfwork, that->cod_info->global_gain);
                 outOfBitsStrategy(that, sfwork, vbrsfmin, max_nbits_ch[gr][ch]);
             }
             use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
             assert(use_nbits_ch[gr][ch] <= max_nbits_ch[gr][ch]);
             use_nbits_gr[gr] += use_nbits_ch[gr][ch];
         }               /* for ch */
         use_nbits_fr += use_nbits_gr[gr];
     }

     /* check bit constrains, but it should always be ok, iff there are no bugs ;-)
      */
     if (use_nbits_fr <= max_nbits_fr) {
         return use_nbits_fr;
     }

     ERRORF(gfc, "INTERNAL ERROR IN VBR NEW CODE (1313), please send bug report\n"
            "maxbits=%d usedbits=%d\n", max_nbits_fr, use_nbits_fr);
     exit(-1);
 }