Palette: remove palette_first_color_idx[] in PALETTE_MODE_INFO

Handle the first color index in the tokenization process, along with the
other color indeices.

This patch also includes some minor refactoring changes.

Test results verify that there is no implact on compression efficiency.

Change-Id: I7de51c18a52f337320331b5e8d63dfea3cf510f0
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index b3838de..4488210 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -297,9 +297,6 @@
   uint8_t palette_size[2];
   // Value of base colors for Y, U, and V
   uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
-  // Only used by encoder to store the color index of the top left pixel.
-  // TODO(huisu): move this to encoder
-  uint8_t palette_first_color_idx[2];
 } PALETTE_MODE_INFO;
 #endif  // CONFIG_PALETTE
 
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 166721b..05fdc46 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -35,19 +35,6 @@
 
 #define DEC_MISMATCH_DEBUG 0
 
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
-static INLINE int read_uniform(aom_reader *r, int n) {
-  const int l = get_unsigned_bits(n);
-  const int m = (1 << l) - n;
-  const int v = aom_read_literal(r, l - 1, ACCT_STR);
-  assert(l != 0);
-  if (v < m)
-    return v;
-  else
-    return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
-}
-#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
-
 static PREDICTION_MODE read_intra_mode(aom_reader *r, aom_cdf_prob *cdf) {
   return (PREDICTION_MODE)
       av1_intra_mode_inv[aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR)];
@@ -809,17 +796,18 @@
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
-  int n;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
 
   if (mbmi->mode == DC_PRED) {
     int palette_y_mode_ctx = 0;
-    if (above_mi)
+    if (above_mi) {
       palette_y_mode_ctx +=
           (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
-    if (left_mi)
+    }
+    if (left_mi) {
       palette_y_mode_ctx +=
           (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+    }
     if (aom_read(r, av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
                                                    [palette_y_mode_ctx],
                  ACCT_STR)) {
@@ -835,16 +823,12 @@
                         ACCT_STR) +
           2;
 #endif
-      n = pmi->palette_size[0];
 #if CONFIG_PALETTE_DELTA_ENCODING
       read_palette_colors_y(xd, cm->bit_depth, pmi, r);
 #else
-      int i;
-      for (i = 0; i < n; ++i)
+      for (int i = 0; i < pmi->palette_size[0]; ++i)
         pmi->palette_colors[i] = aom_read_literal(r, cm->bit_depth, ACCT_STR);
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      xd->plane[0].color_index_map[0] = read_uniform(r, n);
-      assert(xd->plane[0].color_index_map[0] < n);
     }
   }
 
@@ -864,20 +848,16 @@
                         ACCT_STR) +
           2;
 #endif
-      n = pmi->palette_size[1];
 #if CONFIG_PALETTE_DELTA_ENCODING
       read_palette_colors_uv(xd, cm->bit_depth, pmi, r);
 #else
-      int i;
-      for (i = 0; i < n; ++i) {
+      for (int i = 0; i < pmi->palette_size[1]; ++i) {
         pmi->palette_colors[PALETTE_MAX_SIZE + i] =
             aom_read_literal(r, cm->bit_depth, ACCT_STR);
         pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] =
             aom_read_literal(r, cm->bit_depth, ACCT_STR);
       }
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      xd->plane[1].color_index_map[0] = read_uniform(r, n);
-      assert(xd->plane[1].color_index_map[0] < n);
     }
   }
 }
@@ -902,7 +882,7 @@
         aom_read(r, cm->fc->filter_intra_probs[0], ACCT_STR);
     if (filter_intra_mode_info->use_filter_intra_mode[0]) {
       filter_intra_mode_info->filter_intra_mode[0] =
-          read_uniform(r, FILTER_INTRA_MODES);
+          av1_read_uniform(r, FILTER_INTRA_MODES);
     }
     if (counts) {
       ++counts
@@ -929,7 +909,7 @@
         aom_read(r, cm->fc->filter_intra_probs[1], ACCT_STR);
     if (filter_intra_mode_info->use_filter_intra_mode[1]) {
       filter_intra_mode_info->filter_intra_mode[1] =
-          read_uniform(r, FILTER_INTRA_MODES);
+          av1_read_uniform(r, FILTER_INTRA_MODES);
     }
     if (counts) {
       ++counts
@@ -959,7 +939,7 @@
 
   if (av1_is_directional_mode(mbmi->mode, bsize)) {
     mbmi->angle_delta[0] =
-        read_uniform(r, 2 * MAX_ANGLE_DELTA + 1) - MAX_ANGLE_DELTA;
+        av1_read_uniform(r, 2 * MAX_ANGLE_DELTA + 1) - MAX_ANGLE_DELTA;
 #if CONFIG_INTRA_INTERP
     p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
     if (av1_is_intra_filter_switchable(p_angle)) {
@@ -975,7 +955,7 @@
 
   if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
     mbmi->angle_delta[1] =
-        read_uniform(r, 2 * MAX_ANGLE_DELTA + 1) - MAX_ANGLE_DELTA;
+        av1_read_uniform(r, 2 * MAX_ANGLE_DELTA + 1) - MAX_ANGLE_DELTA;
   }
 }
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 6f62911..7a6e7cd 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -215,6 +215,20 @@
 }
 #endif  // CONFIG_EXT_REFS
 
+#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
+#define ACCT_STR __func__
+static INLINE int av1_read_uniform(aom_reader *r, int n) {
+  const int l = get_unsigned_bits(n);
+  const int m = (1 << l) - n;
+  const int v = aom_read_literal(r, l - 1, ACCT_STR);
+  assert(l != 0);
+  if (v < m)
+    return v;
+  else
+    return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
+}
+#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index ce0b70c..8e30139 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -16,6 +16,7 @@
 #endif  // !CONFIG_PVQ
 
 #include "av1/common/blockd.h"
+#include "av1/decoder/detokenize.h"
 
 #define ACCT_STR __func__
 
@@ -23,7 +24,6 @@
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
 #include "av1/common/idct.h"
-#include "av1/decoder/detokenize.h"
 
 #define EOB_CONTEXT_NODE 0
 #define ZERO_CONTEXT_NODE 1
@@ -225,7 +225,6 @@
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   uint8_t color_order[PALETTE_MAX_SIZE];
   const int n = mbmi->palette_mode_info.palette_size[plane];
-  int i, j;
   uint8_t *const color_map = xd->plane[plane].color_index_map;
 #if CONFIG_NEW_MULTISYMBOL
   aom_cdf_prob(
@@ -244,10 +243,14 @@
                            &plane_block_height, &rows, &cols);
   assert(plane == 0 || plane == 1);
 
+  // The first color index.
+  color_map[0] = av1_read_uniform(r, n);
+  assert(color_map[0] < n);
+
 #if CONFIG_PALETTE_THROUGHPUT
   // Run wavefront on the palette map index decoding.
-  for (i = 1; i < rows + cols - 1; ++i) {
-    for (j = AOMMIN(i, cols - 1); j >= AOMMAX(0, i - rows + 1); --j) {
+  for (int i = 1; i < rows + cols - 1; ++i) {
+    for (int j = AOMMIN(i, cols - 1); j >= AOMMAX(0, i - rows + 1); --j) {
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, (i - j), j, n, color_order, NULL);
 #if CONFIG_NEW_MULTISYMBOL
@@ -264,15 +267,15 @@
   }
   // Copy last column to extra columns.
   if (cols < plane_block_width) {
-    for (i = 0; i < plane_block_height; ++i) {
+    for (int i = 0; i < plane_block_height; ++i) {
       memset(color_map + i * plane_block_width + cols,
              color_map[i * plane_block_width + cols - 1],
              (plane_block_width - cols));
     }
   }
 #else
-  for (i = 0; i < rows; ++i) {
-    for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+  for (int i = 0; i < rows; ++i) {
+    for (int j = (i == 0 ? 1 : 0); j < cols; ++j) {
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, i, j, n, color_order, NULL);
 #if CONFIG_NEW_MULTISYMBOL
@@ -292,7 +295,7 @@
   }
 #endif  // CONFIG_PALETTE_THROUGHPUT
   // Copy last row to extra rows.
-  for (i = rows; i < plane_block_height; ++i) {
+  for (int i = rows; i < plane_block_height; ++i) {
     memcpy(color_map + i * plane_block_width,
            color_map + (rows - 1) * plane_block_width, plane_block_width);
   }
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index ba40666..0e58a28 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -14,9 +14,9 @@
 
 #include "./aom_config.h"
 #if !CONFIG_PVQ || CONFIG_VAR_TX
-#include "av1/decoder/decoder.h"
 #include "av1/common/scan.h"
-#endif  // !CONFIG_PVQ
+#endif  // !CONFIG_PVQ || CONFIG_VAR_TX
+#include "av1/decoder/decoder.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index bcb95d8..8704469 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -643,10 +643,11 @@
 #if CONFIG_PALETTE
 static void pack_palette_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
                                 int num) {
-  int i;
   const TOKENEXTRA *p = *tp;
-
-  for (i = 0; i < num; ++i) {
+  write_uniform(w, n, p->token);  // The first color index.
+  ++p;
+  --num;
+  for (int i = 0; i < num; ++i) {
 #if CONFIG_NEW_MULTISYMBOL
     aom_write_symbol(w, p->token, p->palette_cdf, n);
 #else
@@ -656,7 +657,6 @@
 #endif
     ++p;
   }
-
   *tp = p;
 }
 #endif  // CONFIG_PALETTE
@@ -1493,12 +1493,14 @@
   if (mbmi->mode == DC_PRED) {
     const int n = pmi->palette_size[0];
     int palette_y_mode_ctx = 0;
-    if (above_mi)
+    if (above_mi) {
       palette_y_mode_ctx +=
           (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
-    if (left_mi)
+    }
+    if (left_mi) {
       palette_y_mode_ctx +=
           (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+    }
     aom_write(
         w, n > 0,
         av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_y_mode_ctx]);
@@ -1520,7 +1522,6 @@
         aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth);
       }
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      write_uniform(w, n, pmi->palette_first_color_idx[0]);
     }
   }
 
@@ -1552,7 +1553,6 @@
                           cm->bit_depth);
       }
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      write_uniform(w, n, pmi->palette_first_color_idx[1]);
     }
   }
 }
@@ -2503,7 +2503,7 @@
       av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
                                &cols);
       assert(*tok < tok_end);
-      pack_palette_tokens(w, tok, palette_size_plane, rows * cols - 1);
+      pack_palette_tokens(w, tok, palette_size_plane, rows * cols);
       assert(*tok < tok_end + mbmi->skip);
     }
   }
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index dbce6a8..4148009 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5968,13 +5968,8 @@
 #if CONFIG_PALETTE
     if (bsize >= BLOCK_8X8 && !dry_run) {
       for (plane = 0; plane <= 1; ++plane) {
-        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
-          mbmi->palette_mode_info.palette_first_color_idx[plane] =
-              xd->plane[plane].color_index_map[0];
-          // TODO(huisu): this increases the use of token buffer. Needs stretch
-          // test to verify.
+        if (mbmi->palette_mode_info.palette_size[plane] > 0)
           av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate);
-        }
       }
     }
 #endif  // CONFIG_PALETTE
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index d188d4e..bf29608 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -323,15 +323,12 @@
                              const struct ThreadData *const td, int plane,
                              TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                              int *rate) {
+  assert(plane == 0 || plane == 1);
   const MACROBLOCK *const x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const uint8_t *const color_map = xd->plane[plane].color_index_map;
   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int n = pmi->palette_size[plane];
-  int i, j;
-  int this_rate = 0;
-  uint8_t color_order[PALETTE_MAX_SIZE];
 #if CONFIG_NEW_MULTISYMBOL
   aom_cdf_prob(
       *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
@@ -347,24 +344,37 @@
   int plane_block_width, rows, cols;
   av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows,
                            &cols);
-  assert(plane == 0 || plane == 1);
 
-#if CONFIG_PALETTE_THROUGHPUT
-  int k;
-  for (k = 1; k < rows + cols - 1; ++k) {
-    for (j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
-      i = k - j;
+  // The first color index does not use context or entropy.
+  (*t)->token = color_map[0];
+#if CONFIG_NEW_MULTISYMBOL
+  (*t)->palette_cdf = NULL;
 #else
-  for (i = 0; i < rows; ++i) {
-    for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+  (*t)->context_tree = NULL;
+#endif
+  (*t)->skip_eob_node = 0;
+  ++(*t);
+
+  const int n = pmi->palette_size[plane];
+  const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS;
+  int this_rate = 0;
+  uint8_t color_order[PALETTE_MAX_SIZE];
+#if CONFIG_PALETTE_THROUGHPUT
+  for (int k = 1; k < rows + cols - 1; ++k) {
+    for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
+      int i = k - j;
+#else
+  for (int i = 0; i < rows; ++i) {
+    for (int j = (i == 0 ? 1 : 0); j < cols; ++j) {
 #endif  // CONFIG_PALETTE_THROUGHPUT
       int color_new_idx;
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
       assert(color_new_idx >= 0 && color_new_idx < n);
-      if (dry_run == DRY_RUN_COSTCOEFFS)
+      if (calc_rate) {
         this_rate += cpi->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
                                               [color_new_idx];
+      }
       (*t)->token = color_new_idx;
 #if CONFIG_NEW_MULTISYMBOL
       (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];