Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6

commit: cd8cfb86750ab9dd1bbab788b2f1dee41f84e1da [log] [tgz]
author: Yi Luo <luoyi@google.com> Mon May 09 18:34:16 2016 -0700
committer: Yi Luo <luoyi@google.com> Mon May 09 18:34:16 2016 -0700
tree: aa620aa0cecca5213ca3812f184cd882a7dcc070
parent: 1d2d1e752eac60eae0390c07962ee9b91b8af85d [diff]
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
index c3552dc..80ac78b 100644
--- a/test/vp10_inv_txfm2d_test.cc
+++ b/test/vp10_inv_txfm2d_test.cc

@@ -84,7 +84,7 @@
           }
 
           fwd_txfm_func(input, output, txfm_size, tx_type, bd);
-          inv_txfm_func(output, ref_input, txfm_size, inv_txfm_cfg, bd);
+          inv_txfm_func(output, ref_input, txfm_size, tx_type, bd);
 
           for (int ni = 0; ni < sqr_txfm_size; ++ni) {
             EXPECT_LE(abs(input[ni] - ref_input[ni]), 4);

diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h
index 6b0bd0a..c4d03ce 100644
--- a/test/vp10_txfm_test.h
+++ b/test/vp10_txfm_test.h

@@ -104,10 +104,8 @@
 typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit,
                          const int8_t* range_bit);
 
-typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int,
-                                int tx_type, const int);
-typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int,
-                                const TXFM_2D_CFG*, const int);
+typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, int, int, int);
+typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, int, int, int);
 
 static const int bd = 10;
 static const int input_base = (1 << bd);

diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index b7da81b..ab17cca 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c

@@ -1302,20 +1302,11 @@
 
   switch (tx_type) {
     case DCT_DCT:
-      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_dct_dct_4, bd);
-      break;
     case ADST_DCT:
-      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_adst_dct_4, bd);
-      break;
     case DCT_ADST:
-      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_dct_adst_4, bd);
-      break;
     case ADST_ADST:
       vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_adst_adst_4, bd);
+                              tx_type, bd);
       break;
 #if CONFIG_EXT_TX
     case FLIPADST_DCT:
@@ -1350,20 +1341,11 @@
   (void)eob;
   switch (tx_type) {
     case DCT_DCT:
-      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_dct_dct_8, bd);
-      break;
     case ADST_DCT:
-      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_adst_dct_8, bd);
-      break;
     case DCT_ADST:
-      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_dct_adst_8, bd);
-      break;
     case ADST_ADST:
       vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
-                              &inv_txfm_2d_cfg_adst_adst_8, bd);
+                              tx_type, bd);
       break;
 #if CONFIG_EXT_TX
     case FLIPADST_DCT:
@@ -1398,20 +1380,11 @@
   (void)eob;
   switch (tx_type) {
     case DCT_DCT:
-      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_dct_dct_16, bd);
-      break;
     case ADST_DCT:
-      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_adst_dct_16, bd);
-      break;
     case DCT_ADST:
-      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_dct_adst_16, bd);
-      break;
     case ADST_ADST:
       vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_adst_adst_16, bd);
+                                tx_type, bd);
       break;
 #if CONFIG_EXT_TX
     case FLIPADST_DCT:
@@ -1447,7 +1420,7 @@
   switch (tx_type) {
     case DCT_DCT:
       vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_dct_dct_32, bd);
+                                DCT_DCT, bd);
       break;
 #if CONFIG_EXT_TX
     case ADST_DCT:

diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
index 5227fc8..3ae54c9 100644
--- a/vp10/common/vp10_inv_txfm2d.c
+++ b/vp10/common/vp10_inv_txfm2d.c

@@ -8,8 +8,10 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "vp10/common/enums.h"
 #include "vp10/common/vp10_txfm.h"
 #include "vp10/common/vp10_inv_txfm1d.h"
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
 
 static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
   switch (txfm_type) {
@@ -46,6 +48,105 @@
   }
 }
 
+static const TXFM_2D_CFG* vp10_get_inv_txfm_4x4_cfg(int tx_type) {
+  const TXFM_2D_CFG* cfg = NULL;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_4;
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_4;
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_4;
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      break;
+    default:
+      assert(0);
+  }
+  return cfg;
+}
+
+static const TXFM_2D_CFG* vp10_get_inv_txfm_8x8_cfg(int tx_type) {
+  const TXFM_2D_CFG* cfg = NULL;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_8;
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_8;
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_8;
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      break;
+    default:
+      assert(0);
+  }
+  return cfg;
+}
+
+static const TXFM_2D_CFG* vp10_get_inv_txfm_16x16_cfg(int tx_type) {
+  const TXFM_2D_CFG* cfg = NULL;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_16;
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_16;
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_16;
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      break;
+    default:
+      assert(0);
+  }
+  return cfg;
+}
+
+static const TXFM_2D_CFG* vp10_get_inv_txfm_32x32_cfg(int tx_type) {
+  const TXFM_2D_CFG* cfg = NULL;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_32;
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_32;
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_32;
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_32;
+      break;
+    default:
+      assert(0);
+  }
+  return cfg;
+}
+
+static const TXFM_2D_CFG* vp10_get_inv_txfm_64x64_cfg(int tx_type) {
+  const TXFM_2D_CFG* cfg = NULL;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_64;
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    default:
+      assert(0);
+  }
+  return cfg;
+}
+
+
 static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
                                     int stride, const TXFM_2D_CFG *cfg,
                                     int32_t *txfm_buf) {
@@ -86,61 +187,66 @@
 }
 
 void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
-                             const int stride, const TXFM_2D_CFG *cfg,
-                             const int bd) {
+                               int stride, int tx_type,
+                               int bd) {
   int txfm_buf[4 * 4 + 4 + 4];
   // output contains the prediction signal which is always positive and smaller
   // than (1 << bd) - 1
   // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
   // int16_t*
+  const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_4x4_cfg(tx_type);
   inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
   clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
 }
 
 void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
-                             const int stride, const TXFM_2D_CFG *cfg,
-                             const int bd) {
+                               int stride, int tx_type,
+                               int bd) {
   int txfm_buf[8 * 8 + 8 + 8];
   // output contains the prediction signal which is always positive and smaller
   // than (1 << bd) - 1
   // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
   // int16_t*
+  const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_8x8_cfg(tx_type);
   inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
   clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
 }
 
 void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
-                               const int stride, const TXFM_2D_CFG *cfg,
-                               const int bd) {
+                                 int stride, int tx_type,
+                                 int bd) {
   int txfm_buf[16 * 16 + 16 + 16];
   // output contains the prediction signal which is always positive and smaller
   // than (1 << bd) - 1
   // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
   // int16_t*
+  const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_16x16_cfg(tx_type);
   inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
   clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
 }
 
 void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
-                               const int stride, const TXFM_2D_CFG *cfg,
-                               const int bd) {
+                                 int stride, int tx_type,
+                                 int bd) {
   int txfm_buf[32 * 32 + 32 + 32];
   // output contains the prediction signal which is always positive and smaller
   // than (1 << bd) - 1
   // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
   // int16_t*
+  const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_32x32_cfg(tx_type);
   inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
   clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
 }
 
 void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
-                               const int stride, const TXFM_2D_CFG *cfg,
-                               const int bd) {
+                                 int stride, int tx_type,
+                                 int bd) {
   int txfm_buf[64 * 64 + 64 + 64];
   // output contains the prediction signal which is always positive and smaller
   // than (1 << bd) - 1
   // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
   // int16_t*
+  const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
   inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
   clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
 }

diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index ae0d2cb..0e59bfe 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl

@@ -626,15 +626,15 @@
   specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/;
 
   #inv txfm
-  add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+  add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
   specialize qw/vp10_inv_txfm2d_add_4x4/;
-  add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+  add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
   specialize qw/vp10_inv_txfm2d_add_8x8/;
-  add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+  add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
   specialize qw/vp10_inv_txfm2d_add_16x16/;
-  add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+  add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
   specialize qw/vp10_inv_txfm2d_add_32x32/;
-  add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+  add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
   specialize qw/vp10_inv_txfm2d_add_64x64/;
 }
commit	cd8cfb86750ab9dd1bbab788b2f1dee41f84e1da	[log] [tgz]
author	Yi Luo <luoyi@google.com>	Mon May 09 18:34:16 2016 -0700
committer	Yi Luo <luoyi@google.com>	Mon May 09 18:34:16 2016 -0700
tree	aa620aa0cecca5213ca3812f184cd882a7dcc070
parent	1d2d1e752eac60eae0390c07962ee9b91b8af85d [diff]