Adding ss_size_lookup table.

Removing the old one bsize_from_dim_lookup. Now we have a way to determine
block size for plane using its subsampling values (ss_size_lookup). And
then we can find the number of pixels in the block (num_pels_log2_lookup).

Change-Id: I6fc981da2ae093de81741d3d78eaefed11015db9
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index aab47e4..a429554 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -433,6 +433,14 @@
   return res;
 }
 
+static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
+                                            struct macroblockd_plane *pd) {
+  BLOCK_SIZE_TYPE bs = ss_size_lookup[bsize]
+                                     [pd->subsampling_x][pd->subsampling_y];
+  assert(bs < BLOCK_SIZE_TYPES);
+  return bs;
+}
+
 static INLINE int plane_block_width(BLOCK_SIZE_TYPE bsize,
                                     const struct macroblockd_plane* plane) {
   return 4 << (b_width_log2(bsize) - plane->subsampling_x);
@@ -695,10 +703,11 @@
                                    int eob, int aoff, int loff,
                                    ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
   struct macroblockd_plane *pd = &xd->plane[plane];
+  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
+  int mi_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+  int mi_blocks_high = num_4x4_blocks_high_lookup[bs];
   int above_contexts = tx_size_in_blocks;
   int left_contexts = tx_size_in_blocks;
-  int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd);
-  int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd);
   int pt;
 
   // xd->mb_to_right_edge is in units of pixels * 8.  This converts
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index fdf37e4..399391a 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -76,49 +76,60 @@
 
 const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
   {     // PARTITION_NONE
-    BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
-    BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
+    BLOCK_4X4,   BLOCK_4X8,   BLOCK_8X4,
+    BLOCK_8X8,   BLOCK_8X16,  BLOCK_16X8,
     BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
     BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
     BLOCK_64X64,
   }, {  // PARTITION_HORZ
-    BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_8X4,     BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_16X8,    BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_32X16,   BLOCK_INVALID, BLOCK_INVALID,
     BLOCK_64X32,
   }, {  // PARTITION_VERT
-    BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_4X8,     BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_8X16,    BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_16X32,   BLOCK_INVALID, BLOCK_INVALID,
     BLOCK_32X64,
   }, {  // PARTITION_SPLIT
-    BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
-    BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_4X4,     BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_8X8,     BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_16X16,   BLOCK_INVALID, BLOCK_INVALID,
     BLOCK_32X32,
   }
 };
 
 const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = {
-  TX_4X4, TX_4X4, TX_4X4,
-  TX_8X8, TX_8X8, TX_8X8,
+  TX_4X4,   TX_4X4,   TX_4X4,
+  TX_8X8,   TX_8X8,   TX_8X8,
   TX_16X16, TX_16X16, TX_16X16,
   TX_32X32, TX_32X32, TX_32X32, TX_32X32
 };
 const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
-  TX_4X4, TX_4X4, TX_4X4,
-  TX_4X4, TX_4X4, TX_4X4,
-  TX_8X8, TX_8X8, TX_8X8,
+  TX_4X4,   TX_4X4,   TX_4X4,
+  TX_4X4,   TX_4X4,   TX_4X4,
+  TX_8X8,   TX_8X8,   TX_8X8,
   TX_16X16, TX_16X16, TX_16X16, TX_32X32
 };
 
-const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
-  { BLOCK_4X4,   BLOCK_4X8,   BLOCK_4X8,   BLOCK_4X8,   BLOCK_4X8 },
-  { BLOCK_8X4,   BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X16,  BLOCK_8X16 },
-  { BLOCK_16X8,  BLOCK_16X8,  BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 },
-  { BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 },
-  { BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 }
+const BLOCK_SIZE_TYPE ss_size_lookup[BLOCK_SIZE_TYPES][2][2] = {
+//  ss_x == 0    ss_x == 0        ss_x == 1      ss_x == 1
+//  ss_y == 0    ss_y == 1        ss_y == 0      ss_y == 1
+  {{BLOCK_4X4,   BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}},
+  {{BLOCK_4X8,   BLOCK_4X4},     {BLOCK_INVALID, BLOCK_INVALID}},
+  {{BLOCK_8X4,   BLOCK_INVALID}, {BLOCK_4X4,     BLOCK_INVALID}},
+  {{BLOCK_8X8,   BLOCK_8X4},     {BLOCK_4X8,     BLOCK_4X4}},
+  {{BLOCK_8X16,  BLOCK_8X8},     {BLOCK_INVALID, BLOCK_4X8}},
+  {{BLOCK_16X8,  BLOCK_INVALID}, {BLOCK_8X8,     BLOCK_8X4}},
+  {{BLOCK_16X16, BLOCK_16X8},    {BLOCK_8X16,    BLOCK_8X8}},
+  {{BLOCK_16X32, BLOCK_16X16},   {BLOCK_INVALID, BLOCK_8X16}},
+  {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16,   BLOCK_16X8}},
+  {{BLOCK_32X32, BLOCK_32X16},   {BLOCK_16X32,   BLOCK_16X16}},
+  {{BLOCK_32X64, BLOCK_32X32},   {BLOCK_INVALID, BLOCK_16X32}},
+  {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32,   BLOCK_32X16}},
+  {{BLOCK_64X64, BLOCK_64X32},   {BLOCK_32X64,   BLOCK_32X32}},
 };
+
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index bc8c01a..cfa4cd5 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -27,6 +27,6 @@
 extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
 extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
 extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
-extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5];
+extern const BLOCK_SIZE_TYPE ss_size_lookup[BLOCK_SIZE_TYPES][2][2];
 
 #endif    // VP9_COMMON_VP9_COMMON_DATA_H
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 5af52c6..9d0dfa5 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -35,7 +35,8 @@
   BLOCK_32X64,
   BLOCK_64X32,
   BLOCK_64X64,
-  BLOCK_SIZE_TYPES
+  BLOCK_SIZE_TYPES,
+  BLOCK_INVALID = BLOCK_SIZE_TYPES
 } BLOCK_SIZE_TYPE;
 
 typedef enum PARTITION_TYPE {
@@ -43,7 +44,8 @@
   PARTITION_HORZ,
   PARTITION_VERT,
   PARTITION_SPLIT,
-  PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES
+  PARTITION_TYPES,
+  PARTITION_INVALID = PARTITION_TYPES
 } PARTITION_TYPE;
 
 #define PARTITION_PLOFFSET   4  // number of probability models per block size
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 90d35f8..3cc526d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -277,16 +277,6 @@
   }
 }
 
-static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
-  return bsize_from_dim_lookup[bwl][bhl];
-}
-
-static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
-                                            struct macroblockd_plane *pd) {
-  return get_block_size(plane_block_width_log2by4(bsize, pd),
-                        plane_block_height_log2by4(bsize, pd));
-}
-
 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
                                        const double *tab1, const double *tab2,
                                        double *v1, double *v2) {
@@ -399,18 +389,14 @@
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
-
-    // TODO(dkovalev) the same code in get_plane_block_size
-    const int bwl = plane_block_width_log2by4(bsize, pd);
-    const int bhl = plane_block_height_log2by4(bsize, pd);
-    const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
+    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
     unsigned int sse;
     int rate;
     int64_t dist;
     (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                               pd->dst.buf, pd->dst.stride, &sse);
     // sse works better than var, since there is no dc prediction used
-    model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
+    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                              pd->dequant[1] >> 3, &rate, &dist);
 
     rate_sum += rate;
@@ -630,44 +616,46 @@
 // with that of luma component. this function should be deprecated afterwards.
 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                         BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
-  MACROBLOCKD * const xd = &x->e_mbd;
-  const int bwl = plane_block_width_log2by4(bsize, &xd->plane[plane]);
-  const int bhl = plane_block_height_log2by4(bsize, &xd->plane[plane]);
-  const int bw = 1 << bwl, bh = 1 << bhl;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblockd_plane *pd = &xd->plane[plane];
+  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
+  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
   int i;
-  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
-    0, 0, 0, INT64_MAX, 0 };
+  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
+                                    num_4x4_blocks_wide, num_4x4_blocks_high,
+                                    0, 0, 0, INT64_MAX, 0 };
 
   switch (tx_size) {
     case TX_4X4:
-      vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
-                 sizeof(ENTROPY_CONTEXT) * bw);
-      vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
-                 sizeof(ENTROPY_CONTEXT) * bh);
+      vpx_memcpy(&args.t_above, pd->above_context,
+                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
+      vpx_memcpy(&args.t_left, pd->left_context,
+                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
       args.scan = vp9_default_scan_4x4;
       args.nb = vp9_default_scan_4x4_neighbors;
       break;
     case TX_8X8:
-      for (i = 0; i < bw; i += 2)
-        args.t_above[i] = !!*(uint16_t *)&xd->plane[plane].above_context[i];
-      for (i = 0; i < bh; i += 2)
-        args.t_left[i] = !!*(uint16_t *)&xd->plane[plane].left_context[i];
+      for (i = 0; i < num_4x4_blocks_wide; i += 2)
+        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
+      for (i = 0; i < num_4x4_blocks_high; i += 2)
+        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
       args.scan = vp9_default_scan_8x8;
       args.nb = vp9_default_scan_8x8_neighbors;
       break;
     case TX_16X16:
-      for (i = 0; i < bw; i += 4)
-        args.t_above[i] = !!*(uint32_t *)&xd->plane[plane].above_context[i];
-      for (i = 0; i < bh; i += 4)
-        args.t_left[i] = !!*(uint32_t *)&xd->plane[plane].left_context[i];
+      for (i = 0; i < num_4x4_blocks_wide; i += 4)
+        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
+      for (i = 0; i < num_4x4_blocks_high; i += 4)
+        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
       args.scan = vp9_default_scan_16x16;
       args.nb = vp9_default_scan_16x16_neighbors;
       break;
     case TX_32X32:
-      for (i = 0; i < bw; i += 8)
-        args.t_above[i] = !!*(uint64_t *)&xd->plane[plane].above_context[i];
-      for (i = 0; i < bh; i += 8)
-        args.t_left[i] = !!*(uint64_t *)&xd->plane[plane].left_context[i];
+      for (i = 0; i < num_4x4_blocks_wide; i += 8)
+        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
+      for (i = 0; i < num_4x4_blocks_high; i += 8)
+        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
       args.scan = vp9_default_scan_32x32;
       args.nb = vp9_default_scan_32x32_neighbors;
       break;
@@ -696,11 +684,10 @@
 
   *sse = 0;
   for (plane = 1; plane < MAX_MB_PLANE; plane++) {
-    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
-    const int bwl = plane_block_width_log2by4(bsize, p);
-    const int bhl = plane_block_height_log2by4(bsize, p);
-    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
-                           16 << (bwl + bhl), &this_sse);
+    struct macroblockd_plane *pd = &x->e_mbd.plane[plane];
+    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
+    sum += vp9_block_error(x->plane[plane].coeff, pd->dqcoeff,
+                           1 << num_pels_log2_lookup[bs], &this_sse);
     *sse += this_sse;
   }
   *sse >>= shift;
@@ -744,42 +731,43 @@
                                      BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
-  const int bwl = plane_block_width_log2by4(bsize, pd);
-  const int bhl = plane_block_height_log2by4(bsize, pd);
-  const int bw = 1 << bwl, bh = 1 << bhl;
+  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
+  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
   int i;
-  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
+  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
+                                    num_4x4_blocks_wide, num_4x4_blocks_high,
                                     0, 0, 0, ref_best_rd, 0 };
   xd->mode_info_context->mbmi.txfm_size = tx_size;
   switch (tx_size) {
     case TX_4X4:
       vpx_memcpy(&args.t_above, pd->above_context,
-                 sizeof(ENTROPY_CONTEXT) * bw);
+                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
       vpx_memcpy(&args.t_left, pd->left_context,
-                 sizeof(ENTROPY_CONTEXT) * bh);
+                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
       get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0),
                       &args.scan, &args.nb);
       break;
     case TX_8X8:
-      for (i = 0; i < bw; i += 2)
+      for (i = 0; i < num_4x4_blocks_wide; i += 2)
         args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
-      for (i = 0; i < bh; i += 2)
+      for (i = 0; i < num_4x4_blocks_high; i += 2)
         args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
       get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd),
                       &args.scan, &args.nb);
       break;
     case TX_16X16:
-      for (i = 0; i < bw; i += 4)
+      for (i = 0; i < num_4x4_blocks_wide; i += 4)
         args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
-      for (i = 0; i < bh; i += 4)
+      for (i = 0; i < num_4x4_blocks_high; i += 4)
         args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
       get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd),
                         &args.scan, &args.nb);
       break;
     case TX_32X32:
-      for (i = 0; i < bw; i += 8)
+      for (i = 0; i < num_4x4_blocks_wide; i += 8)
         args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
-      for (i = 0; i < bh; i += 8)
+      for (i = 0; i < num_4x4_blocks_high; i += 8)
         args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
       args.scan = vp9_default_scan_32x32;
       args.nb = vp9_default_scan_32x32_neighbors;