add a context tree structure to encoder

This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes.  That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split.  It also includes a pointer to the current chosen partitioning.

This replaces code that held an index for every level in the pick
modes array including:  sb_index, mb_index,
b_index, ab_index.

These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays

array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[],  sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context

and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.

Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement),  update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize,  and then up to 3
lookups based upon the block size to find the context to use.

With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above.   At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).

In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.



Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 230116f..cad1241 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -79,6 +79,18 @@
   // Zbin Over Quant value
   int16_t zbin_extra;
 };
+typedef struct PC_TREE {
+  int index;
+  PARTITION_TYPE partitioning;
+  BLOCK_SIZE block_size;
+  PICK_MODE_CONTEXT none;
+  PICK_MODE_CONTEXT horizontal[2];
+  PICK_MODE_CONTEXT vertical[2];
+  union {
+    struct PC_TREE *split[4];
+    PICK_MODE_CONTEXT *leaf_split[4];
+  };
+} PC_TREE;
 
 /* The [2] dimension is for whether we skip the EOB node (i.e. if previous
  * coefficient in this block was zero) or not. */
@@ -133,11 +145,6 @@
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
 
-  unsigned char sb_index;   // index of 32x32 block inside the 64x64 block
-  unsigned char mb_index;   // index of 16x16 block inside the 32x32 block
-  unsigned char b_index;    // index of 8x8 block inside the 16x16 block
-  unsigned char ab_index;   // index of 4x4 block inside the 8x8 block
-
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
   int mv_col_min;
@@ -164,69 +171,14 @@
   // Used to store sub partition's choices.
   int_mv pred_mv[MAX_REF_FRAMES];
 
-  // TODO(jingning): Need to refactor the structure arrays that buffers the
-  // coding mode decisions of each partition type.
-  PICK_MODE_CONTEXT ab4x4_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x4_context[4][4][4];
-  PICK_MODE_CONTEXT sb4x8_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x8_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x16_context[4][4][2];
-  PICK_MODE_CONTEXT sb16x8_context[4][4][2];
-  PICK_MODE_CONTEXT mb_context[4][4];
-  PICK_MODE_CONTEXT sb32x16_context[4][2];
-  PICK_MODE_CONTEXT sb16x32_context[4][2];
-  // when 4 MBs share coding parameters:
-  PICK_MODE_CONTEXT sb32_context[4];
-  PICK_MODE_CONTEXT sb32x64_context[2];
-  PICK_MODE_CONTEXT sb64x32_context[2];
-  PICK_MODE_CONTEXT sb64_context;
+  PICK_MODE_CONTEXT *leaf_tree;
+  PC_TREE *pc_tree;
+  PC_TREE *pc_root;
   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
 
-  BLOCK_SIZE b_partitioning[4][4][4];
-  BLOCK_SIZE mb_partitioning[4][4];
-  BLOCK_SIZE sb_partitioning[4];
-  BLOCK_SIZE sb64_partitioning;
-
   void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
 };
 
-// TODO(jingning): the variables used here are little complicated. need further
-// refactoring on organizing the temporary buffers, when recursive
-// partition down to 4x4 block size is enabled.
-static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
-                                                   BLOCK_SIZE bsize) {
-  switch (bsize) {
-    case BLOCK_64X64:
-      return &x->sb64_context;
-    case BLOCK_64X32:
-      return &x->sb64x32_context[x->sb_index];
-    case BLOCK_32X64:
-      return &x->sb32x64_context[x->sb_index];
-    case BLOCK_32X32:
-      return &x->sb32_context[x->sb_index];
-    case BLOCK_32X16:
-      return &x->sb32x16_context[x->sb_index][x->mb_index];
-    case BLOCK_16X32:
-      return &x->sb16x32_context[x->sb_index][x->mb_index];
-    case BLOCK_16X16:
-      return &x->mb_context[x->sb_index][x->mb_index];
-    case BLOCK_16X8:
-      return &x->sb16x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X16:
-      return &x->sb8x16_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X8:
-      return &x->sb8x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X4:
-      return &x->sb8x4_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_4X8:
-      return &x->sb4x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_4X4:
-      return &x->ab4x4_context[x->sb_index][x->mb_index][x->b_index];
-    default:
-      assert(0);
-      return NULL;
-  }
-}
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c
new file mode 100644
index 0000000..659935c
--- /dev/null
+++ b/vp9/encoder/vp9_context_tree.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_context_tree.h"
+
+static const BLOCK_SIZE square[] = {
+    BLOCK_8X8,
+    BLOCK_16X16,
+    BLOCK_32X32,
+    BLOCK_64X64,
+};
+
+static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
+                               PICK_MODE_CONTEXT *ctx) {
+  const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
+  const int num_pix = num_blk << 4;
+  int i, k;
+  ctx->num_4x4_blk = num_blk;
+
+  CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
+                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    for (k = 0; k < 3; ++k) {
+      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
+                      vpx_memalign(16, num_pix * sizeof(uint16_t)));
+      ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
+      ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
+      ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
+      ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k];
+    }
+  }
+}
+
+static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
+  int i, k;
+  vpx_free(ctx->zcoeff_blk);
+  ctx->zcoeff_blk = 0;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    for (k = 0; k < 3; ++k) {
+      vpx_free(ctx->coeff[i][k]);
+      ctx->coeff[i][k] = 0;
+      vpx_free(ctx->qcoeff[i][k]);
+      ctx->qcoeff[i][k] = 0;
+      vpx_free(ctx->dqcoeff[i][k]);
+      ctx->dqcoeff[i][k] = 0;
+      vpx_free(ctx->eobs[i][k]);
+      ctx->eobs[i][k] = 0;
+    }
+  }
+}
+static void free_tree_contexts(PC_TREE *this_pc) {
+  free_mode_context(&this_pc->none);
+  free_mode_context(&this_pc->horizontal[0]);
+  free_mode_context(&this_pc->horizontal[1]);
+  free_mode_context(&this_pc->vertical[0]);
+  free_mode_context(&this_pc->vertical[1]);
+}
+static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *this_pc,
+                                int num_4x4_blk) {
+  alloc_mode_context(cm, num_4x4_blk, &this_pc->none);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[0]);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[0]);
+
+  /* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used.
+   * Figure out a better way to do this. */
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[1]);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[1]);
+}
+
+// This function sets up a tree of contexts such that at each square
+// partition level. There are contexts for none, horizontal, vertical, and
+// split.  Along with a block_size value and a selected block_size which
+// represents the state of our search.
+void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
+  int i, j;
+  const int leaf_nodes = 64;
+  const int tree_nodes = 64 + 16 + 4 + 1;
+  int pc_tree_index = 0;
+  PC_TREE *this_pc;
+  PICK_MODE_CONTEXT *this_leaf;
+  int square_index = 1;
+  int nodes;
+
+  vpx_free(x->leaf_tree);
+  CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes,
+                                               sizeof(PICK_MODE_CONTEXT)));
+  vpx_free(x->pc_tree);
+  CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(PC_TREE)));
+
+  this_pc = &x->pc_tree[0];
+  this_leaf = &x->leaf_tree[0];
+
+  // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
+  // context so we only need to allocate 1 for each 8x8 block.
+  for (i = 0; i < leaf_nodes; ++i)
+    alloc_mode_context(cm, 1, &x->leaf_tree[i]);
+
+  // Sets up all the leaf nodes in the tree.
+  for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
+    x->pc_tree[pc_tree_index].block_size = square[0];
+    alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], 4);
+    x->pc_tree[pc_tree_index].leaf_split[0] = this_leaf++;
+    for (j = 1; j < 4; j++) {
+      x->pc_tree[pc_tree_index].leaf_split[j] =
+          x->pc_tree[pc_tree_index].leaf_split[0];
+    }
+  }
+
+  // Each node has 4 leaf nodes, fill each block_size level of the tree
+  // from leafs to the root.
+  for (nodes = 16; nodes > 0; nodes >>= 2, ++square_index) {
+    for (i = 0; i < nodes; ++pc_tree_index,  ++i) {
+      alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index],
+                          4 << (2 * square_index));
+      x->pc_tree[pc_tree_index].block_size = square[square_index];
+      for (j = 0; j < 4; j++) {
+        x->pc_tree[pc_tree_index].split[j] = this_pc++;
+      }
+    }
+  }
+  x->pc_root = &x->pc_tree[tree_nodes-1];
+  x->pc_root[0].none.best_mode_index = 2;
+}
+
+void vp9_free_pc_tree(MACROBLOCK *m) {
+  const int tree_nodes = 64 + 16 + 4 + 1;
+  int i;
+
+  // Set up all 4x4 mode contexts
+  for (i = 0; i < 64; ++i)
+    free_mode_context(&m->leaf_tree[i]);
+
+  // Sets up all the leaf nodes in the tree.
+  for (i = 0; i < tree_nodes; i++) {
+    free_tree_contexts(&m->pc_tree[i]);
+  }
+  vpx_free(m->pc_tree);
+  m->pc_tree = 0;
+  vpx_free(m->leaf_tree);
+  m->leaf_tree = 0;
+}
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
new file mode 100644
index 0000000..40d7394
--- /dev/null
+++ b/vp9/encoder/vp9_context_tree.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
+#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x);
+void vp9_free_pc_tree(MACROBLOCK *x);
+
+#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0498a3f..dea7848 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -48,33 +48,9 @@
 #define SPLIT_MV_ZBIN_BOOST  0
 #define INTRA_ZBIN_BOOST     0
 
-static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
-  switch (subsize) {
-    case BLOCK_64X64:
-    case BLOCK_64X32:
-    case BLOCK_32X64:
-    case BLOCK_32X32:
-      return &x->sb_index;
-    case BLOCK_32X16:
-    case BLOCK_16X32:
-    case BLOCK_16X16:
-      return &x->mb_index;
-    case BLOCK_16X8:
-    case BLOCK_8X16:
-    case BLOCK_8X8:
-      return &x->b_index;
-    case BLOCK_8X4:
-    case BLOCK_4X8:
-    case BLOCK_4X4:
-      return &x->ab_index;
-    default:
-      assert(0);
-      return NULL;
-  }
-}
-
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize);
+                              int mi_row, int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx);
 
 // Motion vector component magnitude threshold for defining fast motion.
 #define FAST_MOTION_MV_THRESH 24
@@ -743,7 +719,7 @@
                              int mi_row, int mi_col,
                              int *totalrate, int64_t *totaldist,
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                             int64_t best_rd) {
+                             int64_t best_rd, int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -760,10 +736,13 @@
   // Use the lower precision, but faster, 32x32 fdct for mode selection.
   x->use_lp32x32fdct = 1;
 
+  // TODO(JBB): Most other places in the code instead of calling the function
+  // and then checking if its not the first 8x8 we put the check in the
+  // calling function.  Do that here.
   if (bsize < BLOCK_8X8) {
     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
     // there is nothing to be done.
-    if (x->ab_index != 0) {
+    if (block != 0) {
       *totalrate = 0;
       *totaldist = 0;
       return;
@@ -886,22 +865,6 @@
   }
 }
 
-static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  switch (bsize) {
-    case BLOCK_64X64:
-      return &x->sb64_partitioning;
-    case BLOCK_32X32:
-      return &x->sb_partitioning[x->sb_index];
-    case BLOCK_16X16:
-      return &x->mb_partitioning[x->sb_index][x->mb_index];
-    case BLOCK_8X8:
-      return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index];
-    default:
-      assert(0);
-      return NULL;
-  }
-}
-
 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
@@ -967,19 +930,12 @@
 
 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
                      TOKENEXTRA **tp, int mi_row, int mi_col,
-                     int output_enabled, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
+                     int output_enabled, BLOCK_SIZE bsize,
+                     PICK_MODE_CONTEXT *ctx) {
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index > 0)
-      return;
-  }
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize,
-               output_enabled);
-  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+  update_state(cpi, ctx, mi_row, mi_col, bsize, output_enabled);
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
   if (output_enabled) {
     update_stats(cpi);
@@ -991,7 +947,8 @@
 
 static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
                       TOKENEXTRA **tp, int mi_row, int mi_col,
-                      int output_enabled, BLOCK_SIZE bsize) {
+                      int output_enabled, BLOCK_SIZE bsize,
+                      PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -999,61 +956,58 @@
   const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
   int ctx;
   PARTITION_TYPE partition;
-  BLOCK_SIZE subsize;
+  BLOCK_SIZE subsize = bsize;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
   if (bsize >= BLOCK_8X8) {
     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-    subsize = *get_sb_partitioning(x, bsize);
+    subsize = get_subsize(bsize, pc_tree->partitioning);
   } else {
     ctx = 0;
     subsize = BLOCK_4X4;
   }
 
   partition = partition_lookup[bsl][subsize];
+  if (output_enabled && bsize != BLOCK_4X4)
+    cm->counts.partition[ctx][partition]++;
 
   switch (partition) {
     case PARTITION_NONE:
-      if (output_enabled && bsize >= BLOCK_8X8)
-        cm->counts.partition[ctx][PARTITION_NONE]++;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->none);
       break;
     case PARTITION_VERT:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_VERT]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
-        encode_b(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->vertical[0]);
+      if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+                 &pc_tree->vertical[1]);
       }
       break;
     case PARTITION_HORZ:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_HORZ]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
-        encode_b(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->horizontal[0]);
+      if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+                 &pc_tree->horizontal[1]);
       }
       break;
     case PARTITION_SPLIT:
-      subsize = get_subsize(bsize, PARTITION_SPLIT);
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_SPLIT]++;
-
-      *get_sb_index(x, subsize) = 0;
-      encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 1;
-      encode_sb(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 2;
-      encode_sb(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 3;
-      encode_sb(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
-                subsize);
+      if (bsize == BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                 pc_tree->leaf_split[0]);
+      } else {
+        encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  pc_tree->split[0]);
+        encode_sb(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+                  pc_tree->split[1]);
+        encode_sb(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+                  pc_tree->split[2]);
+        encode_sb(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+                  subsize, pc_tree->split[3]);
+      }
       break;
     default:
       assert("Invalid partition type.");
@@ -1404,20 +1358,14 @@
 
 static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
                         TOKENEXTRA **tp, int mi_row, int mi_col,
-                        int output_enabled, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
+                     int output_enabled, BLOCK_SIZE bsize,
+                     PICK_MODE_CONTEXT *ctx) {
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index > 0)
-      return;
-  }
 
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize);
+  update_state_rt(cpi, ctx, mi_row, mi_col, bsize);
 
-  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
   update_stats(cpi);
 
   (*tp)->token = EOSB_TOKEN;
@@ -1426,7 +1374,8 @@
 
 static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
                          TOKENEXTRA **tp, int mi_row, int mi_col,
-                         int output_enabled, BLOCK_SIZE bsize) {
+                         int output_enabled, BLOCK_SIZE bsize,
+                         PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1451,51 +1400,40 @@
   }
 
   partition = partition_lookup[bsl][subsize];
+  if (output_enabled && bsize != BLOCK_4X4)
+    cm->counts.partition[ctx][partition]++;
 
   switch (partition) {
     case PARTITION_NONE:
-      if (output_enabled && bsize >= BLOCK_8X8)
-        cm->counts.partition[ctx][PARTITION_NONE]++;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->none);
       break;
     case PARTITION_VERT:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_VERT]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->vertical[0]);
+      if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
         encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
-                    subsize);
+                    subsize, &pc_tree->vertical[1]);
       }
       break;
     case PARTITION_HORZ:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_HORZ]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->horizontal[0]);
+      if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
         encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
-                    subsize);
+                    subsize, &pc_tree->horizontal[1]);
       }
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_SPLIT]++;
-
-      *get_sb_index(x, subsize) = 0;
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 1;
+      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                   pc_tree->split[0]);
       encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
-                   subsize);
-      *get_sb_index(x, subsize) = 2;
+                   subsize, pc_tree->split[1]);
       encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
-                   subsize);
-      *get_sb_index(x, subsize) = 3;
+                   subsize, pc_tree->split[2]);
       encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
-                   subsize);
+                   subsize, pc_tree->split[3]);
       break;
     default:
       assert("Invalid partition type.");
@@ -1510,7 +1448,8 @@
                              MODE_INFO **mi_8x8,
                              TOKENEXTRA **tp, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                             int do_recon) {
+                             int do_recon, PC_TREE *pc_tree,
+                             int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1536,6 +1475,7 @@
   int splits_below = 0;
   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
   int do_partition_search = 1;
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
@@ -1546,17 +1486,7 @@
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  } else {
-    *(get_sb_partitioning(x, bsize)) = subsize;
-  }
+  pc_tree->partitioning = partition;
   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
   if (bsize == BLOCK_16X16) {
@@ -1570,7 +1500,7 @@
     do_partition_search = 0;
     if (mi_row + (mi_step >> 1) < cm->mi_rows &&
         mi_col + (mi_step >> 1) < cm->mi_cols) {
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
       bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
       subsize = bsize;
       partition = PARTITION_NONE;
@@ -1597,9 +1527,9 @@
     if (partition != PARTITION_NONE && !splits_below &&
         mi_row + (mi_step >> 1) < cm->mi_rows &&
         mi_col + (mi_step >> 1) < cm->mi_cols) {
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
-                       get_block_context(x, bsize), INT64_MAX);
+                       ctx, INT64_MAX, 0);
 
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
 
@@ -1610,31 +1540,28 @@
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
       mi_8x8[0]->mbmi.sb_type = bs_type;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = partition;
     }
   }
 
   switch (partition) {
     case PARTITION_NONE:
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, bsize,
-                       get_block_context(x, bsize), INT64_MAX);
+                       &last_part_dist, bsize, ctx, INT64_MAX, 0);
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, subsize,
-                       get_block_context(x, subsize), INT64_MAX);
+                       &last_part_dist, subsize, &pc_tree->horizontal[0],
+                       INT64_MAX, 0);
       if (last_part_rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                     subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-        *get_sb_index(x, subsize) = 1;
+        PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+        update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
-                         subsize, get_block_context(x, subsize), INT64_MAX);
+                         subsize, &pc_tree->horizontal[1], INT64_MAX, 1);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1646,20 +1573,19 @@
       }
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, subsize,
-                       get_block_context(x, subsize), INT64_MAX);
+                       &last_part_dist, subsize, &pc_tree->vertical[0],
+                       INT64_MAX, 0);
       if (last_part_rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                     subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-        *get_sb_index(x, subsize) = 1;
+        PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+        update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
-                         subsize, get_block_context(x, subsize), INT64_MAX);
+                         subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
+                         INT64_MAX, 1);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1670,7 +1596,12 @@
       }
       break;
     case PARTITION_SPLIT:
-      // Split partition.
+      if (bsize == BLOCK_8X8) {
+        rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
+                         &last_part_dist, subsize, pc_tree->leaf_split[0],
+                         INT64_MAX, 0);
+        break;
+      }
       last_part_rate = 0;
       last_part_dist = 0;
       for (i = 0; i < 4; i++) {
@@ -1683,11 +1614,9 @@
         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
           continue;
 
-        *get_sb_index(x, subsize) = i;
-
         rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
                          mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
-                         i != 3);
+                         i != 3, pc_tree->split[i], i);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1719,6 +1648,7 @@
     chosen_rate = 0;
     chosen_dist = 0;
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+    pc_tree->partitioning = PARTITION_SPLIT;
 
     // Split partition.
     for (i = 0; i < 4; i++) {
@@ -1732,15 +1662,11 @@
       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
         continue;
 
-      *get_sb_index(x, split_subsize) = i;
-      *get_sb_partitioning(x, bsize) = split_subsize;
-      *get_sb_partitioning(x, split_subsize) = split_subsize;
-
       save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-
+      pc_tree->split[i]->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
-                       split_subsize, get_block_context(x, split_subsize),
-                       INT64_MAX);
+                       split_subsize, &pc_tree->split[i]->none,
+                       INT64_MAX, i);
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
@@ -1755,7 +1681,7 @@
 
       if (i != 3)
         encode_sb(cpi, tile, tp,  mi_row + y_idx, mi_col + x_idx, 0,
-                  split_subsize);
+                  split_subsize, pc_tree->split[i]);
 
       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
                                    split_subsize);
@@ -1768,19 +1694,19 @@
     }
   }
 
-  // If last_part is better set the partitioning to that...
+  // If last_part is better set the partitioning to that.
   if (last_part_rd < chosen_rd) {
     mi_8x8[0]->mbmi.sb_type = bsize;
     if (bsize >= BLOCK_8X8)
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = partition;
     chosen_rate = last_part_rate;
     chosen_dist = last_part_dist;
     chosen_rd = last_part_rd;
   }
-  // If none was better set the partitioning to that...
+  // If none was better set the partitioning to that.
   if (none_rd < chosen_rd) {
     if (bsize >= BLOCK_8X8)
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
     chosen_rate = none_rate;
     chosen_dist = none_dist;
   }
@@ -1806,8 +1732,8 @@
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               chosen_rate, chosen_dist);
-
-    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize,
+              pc_tree);
   }
 
   *rate = chosen_rate;
@@ -1951,7 +1877,8 @@
 static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                               TOKENEXTRA **tp, int mi_row,
                               int mi_col, BLOCK_SIZE bsize, int *rate,
-                              int64_t *dist, int do_recon, int64_t best_rd) {
+                              int64_t *dist, int do_recon, int64_t best_rd,
+                              PC_TREE *pc_tree, int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1959,7 +1886,7 @@
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
   TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i, pl;
   BLOCK_SIZE subsize;
   int this_rate, sum_rate = 0, best_rate = INT_MAX;
@@ -1980,15 +1907,6 @@
                                bsize >= BLOCK_8X8;
   (void) *tp_orig;
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  }
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
@@ -1998,7 +1916,6 @@
   } else {
     x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
   }
-
   // Determine partition types in search according to the speed features.
   // The threshold set here has to be of square block size.
   if (cpi->sf.auto_min_max_partition_size) {
@@ -2035,7 +1952,7 @@
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
-                     ctx, best_rd);
+                     ctx, best_rd, 0);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
         pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2050,7 +1967,7 @@
         best_dist = this_dist;
         best_rd = sum_rd;
         if (bsize >= BLOCK_8X8)
-          *(get_sb_partitioning(x, bsize)) = bsize;
+          pc_tree->partitioning = PARTITION_NONE;
 
         // Adjust threshold according to partition size.
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
@@ -2082,29 +1999,47 @@
   // the starting point of motion search in the following partition type check.
   if (do_split) {
     subsize = get_subsize(bsize, PARTITION_SPLIT);
-    for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+    if (bsize == BLOCK_8X8) {
+      i = 4;
+      if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
+        pc_tree->leaf_split[0]->pred_interp_filter =
+            ctx->mic.mbmi.interp_filter;
+
+      rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+                       pc_tree->leaf_split[0], best_rd, 0);
+
+      if (sum_rate == INT_MAX) {
+        sum_rd = INT64_MAX;
+      } else {
+        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+        if (sum_rd < best_rd) {
+          update_state(cpi, ctx, mi_row, mi_col, bsize, 0);
+          encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
+          update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+        }
+      }
+    } else {
+      for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
       const int x_idx = (i & 1) * mi_step;
       const int y_idx = (i >> 1) * mi_step;
 
-      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
-        continue;
+        if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+          continue;
 
-      *get_sb_index(x, subsize) = i;
-      if (cpi->sf.adaptive_motion_search)
-        load_pred_mv(x, ctx);
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
-            ctx->mic.mbmi.interp_filter;
-      rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
-                        &this_rate, &this_dist, i != 3, best_rd - sum_rd);
+        if (cpi->sf.adaptive_motion_search)
+          load_pred_mv(x, ctx);
 
-      if (this_rate == INT_MAX) {
-        sum_rd = INT64_MAX;
-      } else {
-        sum_rate += this_rate;
-        sum_dist += this_dist;
-        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+        rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+                          subsize, &this_rate, &this_dist, i != 3,
+                          best_rd - sum_rd, pc_tree->split[i], i);
+
+        if (this_rate == INT_MAX) {
+          sum_rd = INT64_MAX;
+        } else {
+          sum_rate += this_rate;
+          sum_dist += this_dist;
+          sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+        }
       }
     }
     if (sum_rd < best_rd && i == 4) {
@@ -2115,7 +2050,7 @@
         best_rate = sum_rate;
         best_dist = sum_dist;
         best_rd = sum_rd;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_SPLIT;
       }
     } else {
       // skip rectangular partition test when larger block size
@@ -2125,36 +2060,33 @@
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_interp_filter =
+      pc_tree->horizontal[0].pred_interp_filter =
           ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
-                     get_block_context(x, subsize), best_rd);
+                     &pc_tree->horizontal[0], best_rd, 0);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
     if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
-      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                   subsize, 0);
-      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+      update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
 
-      *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
+        pc_tree->horizontal[1].pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
-                       &this_dist, subsize, get_block_context(x, subsize),
-                       best_rd - sum_rd);
+                       &this_dist, subsize, &pc_tree->horizontal[1],
+                       best_rd - sum_rd, 1);
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2171,41 +2103,39 @@
         best_rd = sum_rd;
         best_rate = sum_rate;
         best_dist = sum_dist;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_HORZ;
       }
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // PARTITION_VERT
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_interp_filter =
+      pc_tree->vertical[0].pred_interp_filter =
           ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
-                     get_block_context(x, subsize), best_rd);
+                     &pc_tree->vertical[0], best_rd, 0);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
     if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
-      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                   subsize, 0);
-      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
+      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize,
+                        &pc_tree->vertical[0]);
 
-      *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
+        pc_tree->vertical[bsize > BLOCK_8X8].pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
-                       &this_dist, subsize, get_block_context(x, subsize),
-                       best_rd - sum_rd);
+                       &this_dist, subsize,
+                       &pc_tree->vertical[bsize > BLOCK_8X8], best_rd - sum_rd,
+                       1);
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2222,12 +2152,11 @@
         best_rate = sum_rate;
         best_dist = sum_dist;
         best_rd = sum_rd;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_VERT;
       }
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // TODO(jbb): This code added so that we avoid static analysis
   // warning related to the fact that best_rd isn't used after this
   // point.  This code should be refactored so that the duplicate
@@ -2246,12 +2175,14 @@
       vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
                                     best_rate);
     }
-
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
+    if (bsize == BLOCK_4X4)
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, ctx);
+    else
+      encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
 
-    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
   if (bsize == BLOCK_64X64) {
     assert(tp_orig < *tp);
@@ -2279,18 +2210,18 @@
     int dummy_rate;
     int64_t dummy_dist;
 
-    BLOCK_SIZE i;
+    int i;
     MACROBLOCK *x = &cpi->mb;
 
     if (sf->adaptive_pred_interp_filter) {
-      for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
-        const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-        const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-        const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-        for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
-          for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
-            for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
-              get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
+      for (i = 0; i < 64; ++i)
+        x->leaf_tree[i].pred_interp_filter = SWITCHABLE;
+
+      for (i = 0; i < 64; ++i) {
+        x->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
       }
     }
 
@@ -2310,18 +2241,18 @@
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                sf->always_this_block_size);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
         choose_partitioning(cpi, tile, mi_row, mi_col);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else {
         if ((cm->current_video_frame
             % sf->last_partitioning_redo_frequency) == 0
@@ -2340,7 +2271,8 @@
                                     &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                            &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                            &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root,
+                            0);
         } else {
           if (sf->constrain_copy_partition &&
               sb_has_motion(cm, prev_mi_8x8))
@@ -2349,7 +2281,7 @@
           else
             copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                           &dummy_rate, &dummy_dist, 1);
+                           &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
         }
       }
     } else {
@@ -2361,7 +2293,7 @@
                                 &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                        &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                        &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, 0);
     }
   }
 }
@@ -2523,10 +2455,11 @@
 
 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
                               int mi_row, int mi_col,
-                              BLOCK_SIZE bsize, BLOCK_SIZE subsize) {
+                              BLOCK_SIZE bsize, BLOCK_SIZE subsize,
+                              PC_TREE *pc_tree) {
   MACROBLOCKD *xd = &x->e_mbd;
   int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
-  PARTITION_TYPE partition = partition_lookup[bsl][subsize];
+  PARTITION_TYPE partition = pc_tree->partitioning;
 
   assert(bsize >= BLOCK_8X8);
 
@@ -2536,48 +2469,42 @@
   switch (partition) {
     case PARTITION_NONE:
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->none.mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->vertical[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
 
       if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
         set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs);
-        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        *(xd->mi[0]) = pc_tree->vertical[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize);
       }
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->horizontal[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
       if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
         set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col);
-        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        *(xd->mi[0]) = pc_tree->horizontal[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize);
       }
       break;
-    case PARTITION_SPLIT:
-      *get_sb_index(x, subsize) = 0;
+    case PARTITION_SPLIT: {
+      BLOCK_SIZE subsubsize = get_subsize(subsize, PARTITION_SPLIT);
       fill_mode_info_sb(cm, x, mi_row, mi_col, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 1;
+                        subsubsize, pc_tree->split[0]);
       fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 2;
+                        subsubsize, pc_tree->split[1]);
       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 3;
+                        subsubsize, pc_tree->split[2]);
       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
-                        *(get_sb_partitioning(x, subsize)));
+                        subsubsize, pc_tree->split[3]);
       break;
+    }
     default:
       break;
   }
@@ -2586,15 +2513,16 @@
 static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                                  TOKENEXTRA **tp, int mi_row,
                                  int mi_col, BLOCK_SIZE bsize, int *rate,
-                                 int64_t *dist, int do_recon, int64_t best_rd) {
+                                 int64_t *dist, int do_recon, int64_t best_rd,
+                                 PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
   TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i;
-  BLOCK_SIZE subsize;
+  BLOCK_SIZE subsize = bsize;
   int this_rate, sum_rate = 0, best_rate = INT_MAX;
   int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
   int64_t sum_rd = 0;
@@ -2613,16 +2541,6 @@
                                bsize >= BLOCK_8X8;
   (void) *tp_orig;
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  }
-
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
@@ -2667,7 +2585,7 @@
         best_dist = this_dist;
         best_rd = sum_rd;
         if (bsize >= BLOCK_8X8)
-          *(get_sb_partitioning(x, bsize)) = bsize;
+          pc_tree->partitioning = PARTITION_NONE;
 
         // Adjust threshold according to partition size.
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
@@ -2704,12 +2622,11 @@
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
         continue;
 
-      *get_sb_index(x, subsize) = i;
       load_pred_mv(x, ctx);
 
       nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
                            subsize, &this_rate, &this_dist, 0,
-                           best_rd - sum_rd);
+                           best_rd - sum_rd, pc_tree->split[i]);
 
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
@@ -2724,7 +2641,7 @@
       best_rate = sum_rate;
       best_dist = sum_dist;
       best_rd = sum_rd;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_SPLIT;
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
@@ -2736,26 +2653,22 @@
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, subsize);
 
-    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+    pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
 
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
     if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
-      *get_sb_index(x, subsize) = 1;
-
       load_pred_mv(x, ctx);
-
       nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
                           &this_rate, &this_dist, subsize);
 
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
 
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
@@ -2771,7 +2684,7 @@
       best_rd = sum_rd;
       best_rate = sum_rate;
       best_dist = sum_dist;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_HORZ;
     }
   }
 
@@ -2779,24 +2692,18 @@
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, subsize);
-    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+    pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
     if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
-      *get_sb_index(x, subsize) = 1;
-
       load_pred_mv(x, ctx);
-
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
                           &this_rate, &this_dist, subsize);
-
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
-
+      pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2811,9 +2718,13 @@
       best_rate = sum_rate;
       best_dist = sum_dist;
       best_rd = sum_rd;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_VERT;
     }
   }
+  // TODO(JBB): The following line is here just to avoid a static warning
+  // that occurs because at this point we never again reuse best_rd
+  // despite setting it here.  The code should be refactored to avoid this.
+  (void) best_rd;
 
   *rate = best_rate;
   *dist = best_dist;
@@ -2822,8 +2733,9 @@
     return;
 
   // update mode info array
-  fill_mode_info_sb(cm, x, mi_row, mi_col, bsize,
-                    *(get_sb_partitioning(x, bsize)));
+  subsize = get_subsize(bsize, pc_tree->partitioning);
+  fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, subsize,
+                    pc_tree);
 
   if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
     int output_enabled = (bsize == BLOCK_64X64);
@@ -2840,7 +2752,7 @@
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
 
-    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
   }
 
   if (bsize == BLOCK_64X64) {
@@ -2858,7 +2770,8 @@
                                 TOKENEXTRA **tp,
                                 int mi_row, int mi_col,
                                 BLOCK_SIZE bsize, int output_enabled,
-                                int *totrate, int64_t *totdist) {
+                                int *totrate, int64_t *totdist,
+                                PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2878,17 +2791,15 @@
   switch (partition) {
     case PARTITION_NONE:
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
       if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
         nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
                             &rate, &dist, subsize);
-        get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+        pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2897,14 +2808,12 @@
       }
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
       if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize);
-        get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = mi_8x8[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2914,31 +2823,28 @@
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      *get_sb_index(x, subsize) = 0;
       nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
-                          subsize, output_enabled, totrate, totdist);
-      *get_sb_index(x, subsize) = 1;
+                          subsize, output_enabled, totrate, totdist,
+                          pc_tree->split[0]);
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
                           mi_row, mi_col + hbs, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[1]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
         *totdist += dist;
       }
-      *get_sb_index(x, subsize) = 2;
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
                           mi_row + hbs, mi_col, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[2]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
         *totdist += dist;
       }
-      *get_sb_index(x, subsize) = 3;
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
                           mi_row + hbs, mi_col + hbs, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[3]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
@@ -2953,7 +2859,7 @@
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               *totrate, *totdist);
-    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree);
   }
 }
 
@@ -2970,6 +2876,7 @@
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
        mi_col += MI_BLOCK_SIZE) {
+    MACROBLOCK *x = &cpi->mb;
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -2985,12 +2892,12 @@
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case SOURCE_VAR_BASED_PARTITION:
         set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
@@ -2999,17 +2906,19 @@
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case REFERENCE_PARTITION:
         if (cpi->sf.partition_check ||
             !is_background(cpi, tile, mi_row, mi_col)) {
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                               &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                               &dummy_rate, &dummy_dist, 1, INT64_MAX,
+                               x->pc_root);
         } else {
           copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
-                              BLOCK_64X64, 1, &dummy_rate, &dummy_dist);
+                              BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
+                              x->pc_root);
         }
         break;
       default:
@@ -3072,7 +2981,7 @@
     int i;
     struct macroblock_plane *const p = x->plane;
     struct macroblockd_plane *const pd = xd->plane;
-    PICK_MODE_CONTEXT *ctx = &cpi->mb.sb64_context;
+    PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
 
     for (i = 0; i < MAX_MB_PLANE; ++i) {
       p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -3338,14 +3247,14 @@
 }
 
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize) {
+                              int mi_row, int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
   unsigned int segment_id = mbmi->segment_id;
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index d26b464..b155fd3 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -474,7 +474,7 @@
   TileInfo tile;
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
-  const PICK_MODE_CONTEXT *ctx = &x->sb64_context;
+  const PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
   int i;
 
   int recon_yoffset, recon_uvoffset;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index c609a27..0d6b625 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -31,6 +31,7 @@
 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
 #include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_context_tree.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_firstpass.h"
@@ -179,6 +180,8 @@
   vpx_free(cpi->tok);
   cpi->tok = 0;
 
+  vp9_free_pc_tree(&cpi->mb);
+
   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
     vpx_free(lc->rc_twopass_stats_in.buf);
@@ -563,6 +566,8 @@
 
     CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
   }
+
+  vp9_setup_pc_tree(&cpi->common, &cpi->mb);
 }
 
 
@@ -880,124 +885,6 @@
   } while (++i <= MV_MAX);
 }
 
-static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
-                               PICK_MODE_CONTEXT *ctx) {
-  int num_pix = num_4x4_blk << 4;
-  int i, k;
-  ctx->num_4x4_blk = num_4x4_blk;
-
-  CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
-                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    for (k = 0; k < 3; ++k) {
-      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      vpx_memalign(16, num_pix * sizeof(uint16_t)));
-      ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
-      ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
-      ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
-      ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k];
-    }
-  }
-}
-
-static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
-  int i, k;
-  vpx_free(ctx->zcoeff_blk);
-  ctx->zcoeff_blk = 0;
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    for (k = 0; k < 3; ++k) {
-      vpx_free(ctx->coeff[i][k]);
-      ctx->coeff[i][k] = 0;
-      vpx_free(ctx->qcoeff[i][k]);
-      ctx->qcoeff[i][k] = 0;
-      vpx_free(ctx->dqcoeff[i][k]);
-      ctx->dqcoeff[i][k] = 0;
-      vpx_free(ctx->eobs[i][k]);
-      ctx->eobs[i][k] = 0;
-    }
-  }
-}
-
-static void init_pick_mode_context(VP9_COMP *cpi) {
-  int i;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x  = &cpi->mb;
-
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-    if (i < BLOCK_16X16) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
-          for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
-            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-            alloc_mode_context(cm, num_4x4_blk, ctx);
-          }
-        }
-      }
-    } else if (i < BLOCK_32X32) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
-          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-          ctx->num_4x4_blk = num_4x4_blk;
-          alloc_mode_context(cm, num_4x4_blk, ctx);
-        }
-      }
-    } else if (i < BLOCK_64X64) {
-      for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
-        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-        ctx->num_4x4_blk = num_4x4_blk;
-        alloc_mode_context(cm, num_4x4_blk, ctx);
-      }
-    } else {
-      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-      ctx->num_4x4_blk = num_4x4_blk;
-      alloc_mode_context(cm, num_4x4_blk, ctx);
-    }
-  }
-}
-
-static void free_pick_mode_context(MACROBLOCK *x) {
-  int i;
-
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-    if (i < BLOCK_16X16) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
-          for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
-            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-            free_mode_context(ctx);
-          }
-        }
-      }
-    } else if (i < BLOCK_32X32) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
-          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-          free_mode_context(ctx);
-        }
-      }
-    } else if (i < BLOCK_64X64) {
-      for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
-        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-        free_mode_context(ctx);
-      }
-    } else {
-      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-      free_mode_context(ctx);
-    }
-  }
-}
 
 VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
   int i, j;
@@ -1026,7 +913,6 @@
 
   init_config(cpi, oxcf);
   vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc);
-  init_pick_mode_context(cpi);
 
   cm->current_video_frame = 0;
 
@@ -1418,7 +1304,6 @@
 #endif
   }
 
-  free_pick_mode_context(&cpi->mb);
   dealloc_compressor_data(cpi);
   vpx_free(cpi->mb.ss);
   vpx_free(cpi->tok);
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 24b8d9d..75b0e9e 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -18,6 +18,8 @@
 VP9_CX_SRCS-yes += vp9_cx_iface.c
 
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
+VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
+VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
 VP9_CX_SRCS-yes += encoder/vp9_cost.h
 VP9_CX_SRCS-yes += encoder/vp9_cost.c
 VP9_CX_SRCS-yes += encoder/vp9_dct.c