libvpx/test/vp9_quantize_test.cc - platform/external/libvpx - Git at Google

 /*
  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/vpx_timer.h"

 using libvpx_test::ACMRandom;
 using libvpx_test::Buffer;

 namespace {
 const int number_of_iterations = 100;

 typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
                              int skip_block, const int16_t *zbin,
                              const int16_t *round, const int16_t *quant,
                              const int16_t *quant_shift, tran_low_t *qcoeff,
                              tran_low_t *dqcoeff, const int16_t *dequant,
                              uint16_t *eob, const int16_t *scan,
                              const int16_t *iscan);
 typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
                         int /*max_size*/, bool /*is_fp*/>
     QuantizeParam;

 // Wrapper for FP version which does not use zbin or quant_shift.
 typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
                                int skip_block, const int16_t *round,
                                const int16_t *quant, tran_low_t *qcoeff,
                                tran_low_t *dqcoeff, const int16_t *dequant,
                                uint16_t *eob, const int16_t *scan,
                                const int16_t *iscan);

 template <QuantizeFPFunc fn>
 void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
                     const int16_t *zbin, const int16_t *round,
                     const int16_t *quant, const int16_t *quant_shift,
                     tran_low_t *qcoeff, tran_low_t *dqcoeff,
                     const int16_t *dequant, uint16_t *eob, const int16_t *scan,
                     const int16_t *iscan) {
   (void)zbin;
   (void)quant_shift;

   fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob,
      scan, iscan);
 }

 class VP9QuantizeBase {
  public:
   VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
       : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
     max_value_ = (1 << bit_depth_) - 1;
     zbin_ptr_ =
         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
     round_fp_ptr_ = reinterpret_cast<int16_t *>(
         vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
     quant_fp_ptr_ = reinterpret_cast<int16_t *>(
         vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
     round_ptr_ =
         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
     quant_ptr_ =
         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
     quant_shift_ptr_ = reinterpret_cast<int16_t *>(
         vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
     dequant_ptr_ = reinterpret_cast<int16_t *>(
         vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
   }

   ~VP9QuantizeBase() {
     vpx_free(zbin_ptr_);
     vpx_free(round_fp_ptr_);
     vpx_free(quant_fp_ptr_);
     vpx_free(round_ptr_);
     vpx_free(quant_ptr_);
     vpx_free(quant_shift_ptr_);
     vpx_free(dequant_ptr_);
     zbin_ptr_ = NULL;
     round_fp_ptr_ = NULL;
     quant_fp_ptr_ = NULL;
     round_ptr_ = NULL;
     quant_ptr_ = NULL;
     quant_shift_ptr_ = NULL;
     dequant_ptr_ = NULL;
     libvpx_test::ClearSystemState();
   }

  protected:
   int16_t *zbin_ptr_;
   int16_t *round_fp_ptr_;
   int16_t *quant_fp_ptr_;
   int16_t *round_ptr_;
   int16_t *quant_ptr_;
   int16_t *quant_shift_ptr_;
   int16_t *dequant_ptr_;
   const vpx_bit_depth_t bit_depth_;
   int max_value_;
   const int max_size_;
   const bool is_fp_;
 };

 class VP9QuantizeTest : public VP9QuantizeBase,
                         public ::testing::TestWithParam<QuantizeParam> {
  public:
   VP9QuantizeTest()
       : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
         quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}

  protected:
   const QuantizeFunc quantize_op_;
   const QuantizeFunc ref_quantize_op_;
 };

 // This quantizer compares the AC coefficients to the quantization step size to
 // determine if further multiplication operations are needed.
 // Based on vp9_quantize_fp_sse2().
 void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                       int skip_block, const int16_t *round_ptr,
                       const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
                       tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                       uint16_t *eob_ptr, const int16_t *scan,
                       const int16_t *iscan) {
   int i, eob = -1;
   const int thr = dequant_ptr[1] >> 1;
   (void)iscan;
   (void)skip_block;
   assert(!skip_block);

   // Quantization pass: All coefficients with index >= zero_flag are
   // skippable. Note: zero_flag can be zero.
   for (i = 0; i < n_coeffs; i += 16) {
     int y;
     int nzflag_cnt = 0;
     int abs_coeff[16];
     int coeff_sign[16];

     // count nzflag for each row (16 tran_low_t)
     for (y = 0; y < 16; ++y) {
       const int rc = i + y;
       const int coeff = coeff_ptr[rc];
       coeff_sign[y] = (coeff >> 31);
       abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
       // The first 16 are skipped in the sse2 code.  Do the same here to match.
       if (i >= 16 && (abs_coeff[y] <= thr)) {
         nzflag_cnt++;
       }
     }

     for (y = 0; y < 16; ++y) {
       const int rc = i + y;
       // If all of the AC coeffs in a row has magnitude less than the
       // quantization step_size/2, quantize to zero.
       if (nzflag_cnt < 16) {
         int tmp =
             clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
         tmp = (tmp * quant_ptr[rc != 0]) >> 16;
         qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
       } else {
         qcoeff_ptr[rc] = 0;
         dqcoeff_ptr[rc] = 0;
       }
     }
   }

   // Scan for eob.
   for (i = 0; i < n_coeffs; i++) {
     // Use the scan order to find the correct eob.
     const int rc = scan[i];
     if (qcoeff_ptr[rc]) {
       eob = i;
     }
   }
   *eob_ptr = eob + 1;
 }

 void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
                           int16_t *quant, int16_t *quant_shift,
                           int16_t *dequant, int16_t *round_fp,
                           int16_t *quant_fp) {
   // Max when q == 0.  Otherwise, it is 48 for Y and 42 for U/V.
   const int max_qrounding_factor_fp = 64;

   for (int j = 0; j < 2; j++) {
     // The range is 4 to 1828 in the VP9 tables.
     const int qlookup = rnd->RandRange(1825) + 4;
     round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
     quant_fp[j] = (1 << 16) / qlookup;

     // Values determined by deconstructing vp9_init_quantizer().
     // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
     // values or U/V values of any bit depth. This is because y_delta is not
     // factored into the vp9_ac_quant() call.
     zbin[j] = rnd->RandRange(1200);

     // round may be up to 685 for Y values or 914 for U/V.
     round[j] = rnd->RandRange(914);
     // quant ranges from 1 to -32703
     quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
     // quant_shift goes up to 1 << 16.
     quant_shift[j] = rnd->RandRange(16384);
     // dequant maxes out at 1828 for all cases.
     dequant[j] = rnd->RandRange(1828);
   }
   for (int j = 2; j < 8; j++) {
     zbin[j] = zbin[1];
     round_fp[j] = round_fp[1];
     quant_fp[j] = quant_fp[1];
     round[j] = round[1];
     quant[j] = quant[1];
     quant_shift[j] = quant_shift[1];
     dequant[j] = dequant[1];
   }
 }

 TEST_P(VP9QuantizeTest, OperationCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
   Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
   Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
   Buffer<tran_low_t> ref_qcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_qcoeff.Init());
   Buffer<tran_low_t> ref_dqcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_dqcoeff.Init());
   uint16_t eob, ref_eob;

   for (int i = 0; i < number_of_iterations; ++i) {
     // Test skip block for the first three iterations to catch all the different
     // sizes.
     const int skip_block = 0;
     TX_SIZE sz;
     if (max_size_ == 16) {
       sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
     } else {
       sz = TX_32X32;
     }
     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
     const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
     const int count = (4 << sz) * (4 << sz);
     coeff.Set(&rnd, -max_value_, max_value_);
     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                          quant_fp_ptr_);
     int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
     int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
     ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                      q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                      ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
                      scan_order->scan, scan_order->iscan);

     ASM_REGISTER_STATE_CHECK(quantize_op_(
         coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
         quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
         dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));

     EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
     EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));

     EXPECT_EQ(eob, ref_eob);

     if (HasFailure()) {
       printf("Failure on iteration %d.\n", i);
       qcoeff.PrintDifference(ref_qcoeff);
       dqcoeff.PrintDifference(ref_dqcoeff);
       return;
     }
   }
 }

 TEST_P(VP9QuantizeTest, EOBCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
   Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
   Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
   Buffer<tran_low_t> ref_qcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_qcoeff.Init());
   Buffer<tran_low_t> ref_dqcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_dqcoeff.Init());
   uint16_t eob, ref_eob;

   for (int i = 0; i < number_of_iterations; ++i) {
     const int skip_block = 0;
     TX_SIZE sz;
     if (max_size_ == 16) {
       sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
     } else {
       sz = TX_32X32;
     }
     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
     const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
     int count = (4 << sz) * (4 << sz);
     // Two random entries
     coeff.Set(0);
     coeff.TopLeftPixel()[rnd(count)] =
         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
     coeff.TopLeftPixel()[rnd(count)] =
         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                          quant_fp_ptr_);
     int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
     int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
     ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                      q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                      ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
                      scan_order->scan, scan_order->iscan);

     ASM_REGISTER_STATE_CHECK(quantize_op_(
         coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
         quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
         dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));

     EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
     EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));

     EXPECT_EQ(eob, ref_eob);

     if (HasFailure()) {
       printf("Failure on iteration %d.\n", i);
       qcoeff.PrintDifference(ref_qcoeff);
       dqcoeff.PrintDifference(ref_dqcoeff);
       return;
     }
   }
 }

 TEST_P(VP9QuantizeTest, DISABLED_Speed) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
   Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
   Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
   uint16_t eob;
   TX_SIZE starting_sz, ending_sz;

   if (max_size_ == 16) {
     starting_sz = TX_4X4;
     ending_sz = TX_16X16;
   } else {
     starting_sz = TX_32X32;
     ending_sz = TX_32X32;
   }

   for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
     // zbin > coeff, zbin < coeff.
     for (int i = 0; i < 2; ++i) {
       const int skip_block = 0;
       // TX_TYPE defines the scan order. That is not relevant to the speed test.
       // Pick the first one.
       const TX_TYPE tx_type = DCT_DCT;
       const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
       const int count = (4 << sz) * (4 << sz);

       GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                            quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                            quant_fp_ptr_);
       int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
       int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;

       if (i == 0) {
         // When |coeff values| are less than zbin the results are 0.
         int threshold = 100;
         if (max_size_ == 32) {
           // For 32x32, the threshold is halved. Double it to keep the values
           // from clearing it.
           threshold = 200;
         }
         for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
         coeff.Set(&rnd, -99, 99);
       } else if (i == 1) {
         for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
         coeff.Set(&rnd, -500, 500);
       }

       vpx_usec_timer timer;
       vpx_usec_timer_start(&timer);
       for (int j = 0; j < 100000000 / count; ++j) {
         quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                      q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
                      dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
                      scan_order->scan, scan_order->iscan);
       }
       vpx_usec_timer_mark(&timer);
       const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
       if (i == 0) printf("Bypass calculations.\n");
       if (i == 1) printf("Full calculations.\n");
       printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
              elapsed_time / 1000);
     }
     printf("\n");
   }
 }

 using std::tr1::make_tuple;

 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 // TODO(johannkoenig): Fix vpx_quantize_b_sse2 in highbitdepth builds.
 // make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8),
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9QuantizeTest,
     ::testing::Values(
         make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
                    VPX_BITS_8, 16, false),
         make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
                    VPX_BITS_10, 16, false),
         make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
                    VPX_BITS_12, 16, false),
         make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                    &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
         make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                    &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
         make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                    &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));

 #else
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9QuantizeTest,
     ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
                                  VPX_BITS_8, 16, false),
                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                  16, true)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2

 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9QuantizeTest,
     ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
                                  VPX_BITS_8, 16, false),
                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                  16, true)));
 #else
 INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
                         ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
                                                      &vpx_quantize_b_c,
                                                      VPX_BITS_8, 16, false)));
 #endif

 #if ARCH_X86_64
 // TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
 INSTANTIATE_TEST_CASE_P(
     DISABLED_SSSE3, VP9QuantizeTest,
     ::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
                                  &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
                                  false),
                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
                                  &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
                                  VPX_BITS_8, 32, true)));
 #endif  // ARCH_X86_64
 #endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH

 // TODO(johannkoenig): AVX optimizations do not yet pass the 32x32 test or
 // highbitdepth configurations.
 #if HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     AVX, VP9QuantizeTest,
     ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
                                  VPX_BITS_8, 16, false),
                       // Even though SSSE3 and AVX do not match the reference
                       // code, we can keep them in sync with each other.
                       make_tuple(&vpx_quantize_b_32x32_avx,
                                  &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
                                  false)));
 #endif  // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH

 // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     NEON, VP9QuantizeTest,
     ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
                                  VPX_BITS_8, 16, false),
                       make_tuple(&vpx_quantize_b_32x32_neon,
                                  &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
                                  false),
                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
                                  &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
                                  16, true),
                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
                                  &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
                                  VPX_BITS_8, 32, true)));
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH

 // Only useful to compare "Speed" test results.
 INSTANTIATE_TEST_CASE_P(
     DISABLED_C, VP9QuantizeTest,
     ::testing::Values(
         make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
         make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
                    32, false),
         make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
                    &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
         make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
                    &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
         make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
                    &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
                    true)));
 }  // namespace
	/*
	* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include <math.h>
	#include <stdlib.h>
	#include <string.h>

	#include "third_party/googletest/src/include/gtest/gtest.h"

	#include "./vp9_rtcd.h"
	#include "./vpx_config.h"
	#include "./vpx_dsp_rtcd.h"
	#include "test/acm_random.h"
	#include "test/buffer.h"
	#include "test/clear_system_state.h"
	#include "test/register_state_check.h"
	#include "test/util.h"
	#include "vp9/common/vp9_entropy.h"
	#include "vp9/common/vp9_scan.h"
	#include "vpx/vpx_codec.h"
	#include "vpx/vpx_integer.h"
	#include "vpx_ports/vpx_timer.h"

	using libvpx_test::ACMRandom;
	using libvpx_test::Buffer;

	namespace {
	const int number_of_iterations = 100;

	typedef void (QuantizeFunc)(const tran_low_t coeff, intptr_t count,
	int skip_block, const int16_t *zbin,
	const int16_t round, const int16_t quant,
	const int16_t quant_shift, tran_low_t qcoeff,
	tran_low_t dqcoeff, const int16_t dequant,
	uint16_t eob, const int16_t scan,
	const int16_t *iscan);
	typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
	int /max_size/, bool /is_fp/>
	QuantizeParam;

	// Wrapper for FP version which does not use zbin or quant_shift.
	typedef void (QuantizeFPFunc)(const tran_low_t coeff, intptr_t count,
	int skip_block, const int16_t *round,
	const int16_t quant, tran_low_t qcoeff,
	tran_low_t dqcoeff, const int16_t dequant,
	uint16_t eob, const int16_t scan,
	const int16_t *iscan);

	template <QuantizeFPFunc fn>
	void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
	const int16_t zbin, const int16_t round,
	const int16_t quant, const int16_t quant_shift,
	tran_low_t qcoeff, tran_low_t dqcoeff,
	const int16_t dequant, uint16_t eob, const int16_t *scan,
	const int16_t *iscan) {
	(void)zbin;
	(void)quant_shift;

	fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob,
	scan, iscan);
	}

	class VP9QuantizeBase {
	public:
	VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
	: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
	max_value_ = (1 << bit_depth_) - 1;
	zbin_ptr_ =
	reinterpret_cast<int16_t >(vpx_memalign(16, 8 sizeof(*zbin_ptr_)));
	round_fp_ptr_ = reinterpret_cast<int16_t *>(
	vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
	quant_fp_ptr_ = reinterpret_cast<int16_t *>(
	vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
	round_ptr_ =
	reinterpret_cast<int16_t >(vpx_memalign(16, 8 sizeof(*round_ptr_)));
	quant_ptr_ =
	reinterpret_cast<int16_t >(vpx_memalign(16, 8 sizeof(*quant_ptr_)));
	quant_shift_ptr_ = reinterpret_cast<int16_t *>(
	vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
	dequant_ptr_ = reinterpret_cast<int16_t *>(
	vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
	}

	~VP9QuantizeBase() {
	vpx_free(zbin_ptr_);
	vpx_free(round_fp_ptr_);
	vpx_free(quant_fp_ptr_);
	vpx_free(round_ptr_);
	vpx_free(quant_ptr_);
	vpx_free(quant_shift_ptr_);
	vpx_free(dequant_ptr_);
	zbin_ptr_ = NULL;
	round_fp_ptr_ = NULL;
	quant_fp_ptr_ = NULL;
	round_ptr_ = NULL;
	quant_ptr_ = NULL;
	quant_shift_ptr_ = NULL;
	dequant_ptr_ = NULL;
	libvpx_test::ClearSystemState();
	}

	protected:
	int16_t *zbin_ptr_;
	int16_t *round_fp_ptr_;
	int16_t *quant_fp_ptr_;
	int16_t *round_ptr_;
	int16_t *quant_ptr_;
	int16_t *quant_shift_ptr_;
	int16_t *dequant_ptr_;
	const vpx_bit_depth_t bit_depth_;
	int max_value_;
	const int max_size_;
	const bool is_fp_;
	};

	class VP9QuantizeTest : public VP9QuantizeBase,
	public ::testing::TestWithParam<QuantizeParam> {
	public:
	VP9QuantizeTest()
	: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
	quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}

	protected:
	const QuantizeFunc quantize_op_;
	const QuantizeFunc ref_quantize_op_;
	};

	// This quantizer compares the AC coefficients to the quantization step size to
	// determine if further multiplication operations are needed.
	// Based on vp9_quantize_fp_sse2().
	void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
	int skip_block, const int16_t *round_ptr,
	const int16_t quant_ptr, tran_low_t qcoeff_ptr,
	tran_low_t dqcoeff_ptr, const int16_t dequant_ptr,
	uint16_t eob_ptr, const int16_t scan,
	const int16_t *iscan) {
	int i, eob = -1;
	const int thr = dequant_ptr[1] >> 1;
	(void)iscan;
	(void)skip_block;
	assert(!skip_block);

	// Quantization pass: All coefficients with index >= zero_flag are
	// skippable. Note: zero_flag can be zero.
	for (i = 0; i < n_coeffs; i += 16) {
	int y;
	int nzflag_cnt = 0;
	int abs_coeff[16];
	int coeff_sign[16];

	// count nzflag for each row (16 tran_low_t)
	for (y = 0; y < 16; ++y) {
	const int rc = i + y;
	const int coeff = coeff_ptr[rc];
	coeff_sign[y] = (coeff >> 31);
	abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
	// The first 16 are skipped in the sse2 code. Do the same here to match.
	if (i >= 16 && (abs_coeff[y] <= thr)) {
	nzflag_cnt++;
	}
	}

	for (y = 0; y < 16; ++y) {
	const int rc = i + y;
	// If all of the AC coeffs in a row has magnitude less than the
	// quantization step_size/2, quantize to zero.
	if (nzflag_cnt < 16) {
	int tmp =
	clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
	tmp = (tmp * quant_ptr[rc != 0]) >> 16;
	qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
	dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
	} else {
	qcoeff_ptr[rc] = 0;
	dqcoeff_ptr[rc] = 0;
	}
	}
	}

	// Scan for eob.
	for (i = 0; i < n_coeffs; i++) {
	// Use the scan order to find the correct eob.
	const int rc = scan[i];
	if (qcoeff_ptr[rc]) {
	eob = i;
	}
	}
	*eob_ptr = eob + 1;
	}

	void GenerateHelperArrays(ACMRandom rnd, int16_t zbin, int16_t *round,
	int16_t quant, int16_t quant_shift,
	int16_t dequant, int16_t round_fp,
	int16_t *quant_fp) {
	// Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
	const int max_qrounding_factor_fp = 64;

	for (int j = 0; j < 2; j++) {
	// The range is 4 to 1828 in the VP9 tables.
	const int qlookup = rnd->RandRange(1825) + 4;
	round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
	quant_fp[j] = (1 << 16) / qlookup;

	// Values determined by deconstructing vp9_init_quantizer().
	// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
	// values or U/V values of any bit depth. This is because y_delta is not
	// factored into the vp9_ac_quant() call.
	zbin[j] = rnd->RandRange(1200);

	// round may be up to 685 for Y values or 914 for U/V.
	round[j] = rnd->RandRange(914);
	// quant ranges from 1 to -32703
	quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
	// quant_shift goes up to 1 << 16.
	quant_shift[j] = rnd->RandRange(16384);
	// dequant maxes out at 1828 for all cases.
	dequant[j] = rnd->RandRange(1828);
	}
	for (int j = 2; j < 8; j++) {
	zbin[j] = zbin[1];
	round_fp[j] = round_fp[1];
	quant_fp[j] = quant_fp[1];
	round[j] = round[1];
	quant[j] = quant[1];
	quant_shift[j] = quant_shift[1];
	dequant[j] = dequant[1];
	}
	}

	TEST_P(VP9QuantizeTest, OperationCheck) {
	ACMRandom rnd(ACMRandom::DeterministicSeed());
	Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
	ASSERT_TRUE(coeff.Init());
	Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(qcoeff.Init());
	Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(dqcoeff.Init());
	Buffer<tran_low_t> ref_qcoeff =
	Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(ref_qcoeff.Init());
	Buffer<tran_low_t> ref_dqcoeff =
	Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(ref_dqcoeff.Init());
	uint16_t eob, ref_eob;

	for (int i = 0; i < number_of_iterations; ++i) {
	// Test skip block for the first three iterations to catch all the different
	// sizes.
	const int skip_block = 0;
	TX_SIZE sz;
	if (max_size_ == 16) {
	sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
	} else {
	sz = TX_32X32;
	}
	const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
	const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
	const int count = (4 << sz) * (4 << sz);
	coeff.Set(&rnd, -max_value_, max_value_);
	GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
	quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
	quant_fp_ptr_);
	int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
	int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
	ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
	q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
	ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
	scan_order->scan, scan_order->iscan);

	ASM_REGISTER_STATE_CHECK(quantize_op_(
	coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
	quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
	dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));

	EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
	EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));

	EXPECT_EQ(eob, ref_eob);

	if (HasFailure()) {
	printf("Failure on iteration %d.\n", i);
	qcoeff.PrintDifference(ref_qcoeff);
	dqcoeff.PrintDifference(ref_dqcoeff);
	return;
	}
	}
	}

	TEST_P(VP9QuantizeTest, EOBCheck) {
	ACMRandom rnd(ACMRandom::DeterministicSeed());
	Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
	ASSERT_TRUE(coeff.Init());
	Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(qcoeff.Init());
	Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(dqcoeff.Init());
	Buffer<tran_low_t> ref_qcoeff =
	Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(ref_qcoeff.Init());
	Buffer<tran_low_t> ref_dqcoeff =
	Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(ref_dqcoeff.Init());
	uint16_t eob, ref_eob;

	for (int i = 0; i < number_of_iterations; ++i) {
	const int skip_block = 0;
	TX_SIZE sz;
	if (max_size_ == 16) {
	sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
	} else {
	sz = TX_32X32;
	}
	const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
	const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
	int count = (4 << sz) * (4 << sz);
	// Two random entries
	coeff.Set(0);
	coeff.TopLeftPixel()[rnd(count)] =
	static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
	coeff.TopLeftPixel()[rnd(count)] =
	static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
	GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
	quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
	quant_fp_ptr_);
	int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
	int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
	ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
	q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
	ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
	scan_order->scan, scan_order->iscan);

	ASM_REGISTER_STATE_CHECK(quantize_op_(
	coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
	quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
	dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));

	EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
	EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));

	EXPECT_EQ(eob, ref_eob);

	if (HasFailure()) {
	printf("Failure on iteration %d.\n", i);
	qcoeff.PrintDifference(ref_qcoeff);
	dqcoeff.PrintDifference(ref_dqcoeff);
	return;
	}
	}
	}

	TEST_P(VP9QuantizeTest, DISABLED_Speed) {
	ACMRandom rnd(ACMRandom::DeterministicSeed());
	Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
	ASSERT_TRUE(coeff.Init());
	Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(qcoeff.Init());
	Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
	ASSERT_TRUE(dqcoeff.Init());
	uint16_t eob;
	TX_SIZE starting_sz, ending_sz;

	if (max_size_ == 16) {
	starting_sz = TX_4X4;
	ending_sz = TX_16X16;
	} else {
	starting_sz = TX_32X32;
	ending_sz = TX_32X32;
	}

	for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
	// zbin > coeff, zbin < coeff.
	for (int i = 0; i < 2; ++i) {
	const int skip_block = 0;
	// TX_TYPE defines the scan order. That is not relevant to the speed test.
	// Pick the first one.
	const TX_TYPE tx_type = DCT_DCT;
	const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
	const int count = (4 << sz) * (4 << sz);

	GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
	quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
	quant_fp_ptr_);
	int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
	int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;

	if (i == 0) {
	// When \|coeff values\| are less than zbin the results are 0.
	int threshold = 100;
	if (max_size_ == 32) {
	// For 32x32, the threshold is halved. Double it to keep the values
	// from clearing it.
	threshold = 200;
	}
	for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
	coeff.Set(&rnd, -99, 99);
	} else if (i == 1) {
	for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
	coeff.Set(&rnd, -500, 500);
	}

	vpx_usec_timer timer;
	vpx_usec_timer_start(&timer);
	for (int j = 0; j < 100000000 / count; ++j) {
	quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
	q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
	dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
	scan_order->scan, scan_order->iscan);
	}
	vpx_usec_timer_mark(&timer);
	const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
	if (i == 0) printf("Bypass calculations.\n");
	if (i == 1) printf("Full calculations.\n");
	printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
	elapsed_time / 1000);
	}
	printf("\n");
	}
	}

	using std::tr1::make_tuple;

	#if HAVE_SSE2
	#if CONFIG_VP9_HIGHBITDEPTH
	// TODO(johannkoenig): Fix vpx_quantize_b_sse2 in highbitdepth builds.
	// make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8),
	INSTANTIATE_TEST_CASE_P(
	SSE2, VP9QuantizeTest,
	::testing::Values(
	make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
	VPX_BITS_8, 16, false),
	make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
	VPX_BITS_10, 16, false),
	make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
	VPX_BITS_12, 16, false),
	make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
	&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
	make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
	&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
	make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
	&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));

	#else
	INSTANTIATE_TEST_CASE_P(
	SSE2, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
	VPX_BITS_8, 16, false),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
	&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
	16, true)));
	#endif // CONFIG_VP9_HIGHBITDEPTH
	#endif // HAVE_SSE2

	#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
	#if ARCH_X86_64
	INSTANTIATE_TEST_CASE_P(
	SSSE3, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
	VPX_BITS_8, 16, false),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
	&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
	16, true)));
	#else
	INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
	&vpx_quantize_b_c,
	VPX_BITS_8, 16, false)));
	#endif

	#if ARCH_X86_64
	// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
	INSTANTIATE_TEST_CASE_P(
	DISABLED_SSSE3, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
	&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
	false),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
	&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
	VPX_BITS_8, 32, true)));
	#endif // ARCH_X86_64
	#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH

	// TODO(johannkoenig): AVX optimizations do not yet pass the 32x32 test or
	// highbitdepth configurations.
	#if HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
	INSTANTIATE_TEST_CASE_P(
	AVX, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
	VPX_BITS_8, 16, false),
	// Even though SSSE3 and AVX do not match the reference
	// code, we can keep them in sync with each other.
	make_tuple(&vpx_quantize_b_32x32_avx,
	&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
	false)));
	#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH

	// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
	#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
	INSTANTIATE_TEST_CASE_P(
	NEON, VP9QuantizeTest,
	::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
	VPX_BITS_8, 16, false),
	make_tuple(&vpx_quantize_b_32x32_neon,
	&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
	false),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
	&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
	16, true),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
	&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
	VPX_BITS_8, 32, true)));
	#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH

	// Only useful to compare "Speed" test results.
	INSTANTIATE_TEST_CASE_P(
	DISABLED_C, VP9QuantizeTest,
	::testing::Values(
	make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
	make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
	32, false),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
	&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
	make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
	&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
	make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
	&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
	true)));
	} // namespace