diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 230116f818d93ead39ceb622b15b0ac3820d54cd..cad1241259bf9fd22053da6652c1df423baf7c3a 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -79,6 +79,18 @@ struct macroblock_plane {
   // Zbin Over Quant value
   int16_t zbin_extra;
 };
+typedef struct PC_TREE {
+  int index;
+  PARTITION_TYPE partitioning;
+  BLOCK_SIZE block_size;
+  PICK_MODE_CONTEXT none;
+  PICK_MODE_CONTEXT horizontal[2];
+  PICK_MODE_CONTEXT vertical[2];
+  union {
+    struct PC_TREE *split[4];
+    PICK_MODE_CONTEXT *leaf_split[4];
+  };
+} PC_TREE;
 
 /* The [2] dimension is for whether we skip the EOB node (i.e. if previous
  * coefficient in this block was zero) or not. */
@@ -133,11 +145,6 @@ struct macroblock {
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
 
-  unsigned char sb_index;   // index of 32x32 block inside the 64x64 block
-  unsigned char mb_index;   // index of 16x16 block inside the 32x32 block
-  unsigned char b_index;    // index of 8x8 block inside the 16x16 block
-  unsigned char ab_index;   // index of 4x4 block inside the 8x8 block
-
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
   int mv_col_min;
@@ -164,69 +171,14 @@ struct macroblock {
   // Used to store sub partition's choices.
   int_mv pred_mv[MAX_REF_FRAMES];
 
-  // TODO(jingning): Need to refactor the structure arrays that buffers the
-  // coding mode decisions of each partition type.
-  PICK_MODE_CONTEXT ab4x4_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x4_context[4][4][4];
-  PICK_MODE_CONTEXT sb4x8_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x8_context[4][4][4];
-  PICK_MODE_CONTEXT sb8x16_context[4][4][2];
-  PICK_MODE_CONTEXT sb16x8_context[4][4][2];
-  PICK_MODE_CONTEXT mb_context[4][4];
-  PICK_MODE_CONTEXT sb32x16_context[4][2];
-  PICK_MODE_CONTEXT sb16x32_context[4][2];
-  // when 4 MBs share coding parameters:
-  PICK_MODE_CONTEXT sb32_context[4];
-  PICK_MODE_CONTEXT sb32x64_context[2];
-  PICK_MODE_CONTEXT sb64x32_context[2];
-  PICK_MODE_CONTEXT sb64_context;
+  PICK_MODE_CONTEXT *leaf_tree;
+  PC_TREE *pc_tree;
+  PC_TREE *pc_root;
   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
 
-  BLOCK_SIZE b_partitioning[4][4][4];
-  BLOCK_SIZE mb_partitioning[4][4];
-  BLOCK_SIZE sb_partitioning[4];
-  BLOCK_SIZE sb64_partitioning;
-
   void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
 };
 
-// TODO(jingning): the variables used here are little complicated. need further
-// refactoring on organizing the temporary buffers, when recursive
-// partition down to 4x4 block size is enabled.
-static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
-                                                   BLOCK_SIZE bsize) {
-  switch (bsize) {
-    case BLOCK_64X64:
-      return &x->sb64_context;
-    case BLOCK_64X32:
-      return &x->sb64x32_context[x->sb_index];
-    case BLOCK_32X64:
-      return &x->sb32x64_context[x->sb_index];
-    case BLOCK_32X32:
-      return &x->sb32_context[x->sb_index];
-    case BLOCK_32X16:
-      return &x->sb32x16_context[x->sb_index][x->mb_index];
-    case BLOCK_16X32:
-      return &x->sb16x32_context[x->sb_index][x->mb_index];
-    case BLOCK_16X16:
-      return &x->mb_context[x->sb_index][x->mb_index];
-    case BLOCK_16X8:
-      return &x->sb16x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X16:
-      return &x->sb8x16_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X8:
-      return &x->sb8x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_8X4:
-      return &x->sb8x4_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_4X8:
-      return &x->sb4x8_context[x->sb_index][x->mb_index][x->b_index];
-    case BLOCK_4X4:
-      return &x->ab4x4_context[x->sb_index][x->mb_index][x->b_index];
-    default:
-      assert(0);
-      return NULL;
-  }
-}
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c
new file mode 100644
index 0000000000000000000000000000000000000000..659935c5fd003dddc90e022a441ea4bce1957d6d
--- /dev/null
+++ b/vp9/encoder/vp9_context_tree.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp9/encoder/vp9_context_tree.h"
+
+static const BLOCK_SIZE square[] = {
+    BLOCK_8X8,
+    BLOCK_16X16,
+    BLOCK_32X32,
+    BLOCK_64X64,
+};
+
+static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
+                               PICK_MODE_CONTEXT *ctx) {
+  const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
+  const int num_pix = num_blk << 4;
+  int i, k;
+  ctx->num_4x4_blk = num_blk;
+
+  CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
+                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    for (k = 0; k < 3; ++k) {
+      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
+                      vpx_memalign(16, num_pix * sizeof(int16_t)));
+      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
+                      vpx_memalign(16, num_pix * sizeof(uint16_t)));
+      ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
+      ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
+      ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
+      ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k];
+    }
+  }
+}
+
+static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
+  int i, k;
+  vpx_free(ctx->zcoeff_blk);
+  ctx->zcoeff_blk = 0;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    for (k = 0; k < 3; ++k) {
+      vpx_free(ctx->coeff[i][k]);
+      ctx->coeff[i][k] = 0;
+      vpx_free(ctx->qcoeff[i][k]);
+      ctx->qcoeff[i][k] = 0;
+      vpx_free(ctx->dqcoeff[i][k]);
+      ctx->dqcoeff[i][k] = 0;
+      vpx_free(ctx->eobs[i][k]);
+      ctx->eobs[i][k] = 0;
+    }
+  }
+}
+static void free_tree_contexts(PC_TREE *this_pc) {
+  free_mode_context(&this_pc->none);
+  free_mode_context(&this_pc->horizontal[0]);
+  free_mode_context(&this_pc->horizontal[1]);
+  free_mode_context(&this_pc->vertical[0]);
+  free_mode_context(&this_pc->vertical[1]);
+}
+static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *this_pc,
+                                int num_4x4_blk) {
+  alloc_mode_context(cm, num_4x4_blk, &this_pc->none);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[0]);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[0]);
+
+  /* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used.
+   * Figure out a better way to do this. */
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[1]);
+  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[1]);
+}
+
+// This function sets up a tree of contexts such that at each square
+// partition level. There are contexts for none, horizontal, vertical, and
+// split.  Along with a block_size value and a selected block_size which
+// represents the state of our search.
+void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
+  int i, j;
+  const int leaf_nodes = 64;
+  const int tree_nodes = 64 + 16 + 4 + 1;
+  int pc_tree_index = 0;
+  PC_TREE *this_pc;
+  PICK_MODE_CONTEXT *this_leaf;
+  int square_index = 1;
+  int nodes;
+
+  vpx_free(x->leaf_tree);
+  CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes,
+                                               sizeof(PICK_MODE_CONTEXT)));
+  vpx_free(x->pc_tree);
+  CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(PC_TREE)));
+
+  this_pc = &x->pc_tree[0];
+  this_leaf = &x->leaf_tree[0];
+
+  // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
+  // context so we only need to allocate 1 for each 8x8 block.
+  for (i = 0; i < leaf_nodes; ++i)
+    alloc_mode_context(cm, 1, &x->leaf_tree[i]);
+
+  // Sets up all the leaf nodes in the tree.
+  for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
+    x->pc_tree[pc_tree_index].block_size = square[0];
+    alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], 4);
+    x->pc_tree[pc_tree_index].leaf_split[0] = this_leaf++;
+    for (j = 1; j < 4; j++) {
+      x->pc_tree[pc_tree_index].leaf_split[j] =
+          x->pc_tree[pc_tree_index].leaf_split[0];
+    }
+  }
+
+  // Each node has 4 leaf nodes, fill each block_size level of the tree
+  // from leafs to the root.
+  for (nodes = 16; nodes > 0; nodes >>= 2, ++square_index) {
+    for (i = 0; i < nodes; ++pc_tree_index,  ++i) {
+      alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index],
+                          4 << (2 * square_index));
+      x->pc_tree[pc_tree_index].block_size = square[square_index];
+      for (j = 0; j < 4; j++) {
+        x->pc_tree[pc_tree_index].split[j] = this_pc++;
+      }
+    }
+  }
+  x->pc_root = &x->pc_tree[tree_nodes-1];
+  x->pc_root[0].none.best_mode_index = 2;
+}
+
+void vp9_free_pc_tree(MACROBLOCK *m) {
+  const int tree_nodes = 64 + 16 + 4 + 1;
+  int i;
+
+  // Set up all 4x4 mode contexts
+  for (i = 0; i < 64; ++i)
+    free_mode_context(&m->leaf_tree[i]);
+
+  // Sets up all the leaf nodes in the tree.
+  for (i = 0; i < tree_nodes; i++) {
+    free_tree_contexts(&m->pc_tree[i]);
+  }
+  vpx_free(m->pc_tree);
+  m->pc_tree = 0;
+  vpx_free(m->leaf_tree);
+  m->leaf_tree = 0;
+}
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
new file mode 100644
index 0000000000000000000000000000000000000000..40d7394edd041c324cbd1fd0e5744df6613ac308
--- /dev/null
+++ b/vp9/encoder/vp9_context_tree.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
+#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
+
+#include "vp9/encoder/vp9_onyx_int.h"
+
+void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x);
+void vp9_free_pc_tree(MACROBLOCK *x);
+
+#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0498a3f11eac33323e63b2342fd8705758afa13d..dea7848b2cd9a3ec562668c4a0807852d18fd74c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -48,33 +48,9 @@
 #define SPLIT_MV_ZBIN_BOOST  0
 #define INTRA_ZBIN_BOOST     0
 
-static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
-  switch (subsize) {
-    case BLOCK_64X64:
-    case BLOCK_64X32:
-    case BLOCK_32X64:
-    case BLOCK_32X32:
-      return &x->sb_index;
-    case BLOCK_32X16:
-    case BLOCK_16X32:
-    case BLOCK_16X16:
-      return &x->mb_index;
-    case BLOCK_16X8:
-    case BLOCK_8X16:
-    case BLOCK_8X8:
-      return &x->b_index;
-    case BLOCK_8X4:
-    case BLOCK_4X8:
-    case BLOCK_4X4:
-      return &x->ab_index;
-    default:
-      assert(0);
-      return NULL;
-  }
-}
-
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize);
+                              int mi_row, int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx);
 
 // Motion vector component magnitude threshold for defining fast motion.
 #define FAST_MOTION_MV_THRESH 24
@@ -743,7 +719,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                              int mi_row, int mi_col,
                              int *totalrate, int64_t *totaldist,
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                             int64_t best_rd) {
+                             int64_t best_rd, int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -760,10 +736,13 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   // Use the lower precision, but faster, 32x32 fdct for mode selection.
   x->use_lp32x32fdct = 1;
 
+  // TODO(JBB): Most other places in the code instead of calling the function
+  // and then checking if its not the first 8x8 we put the check in the
+  // calling function.  Do that here.
   if (bsize < BLOCK_8X8) {
     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
     // there is nothing to be done.
-    if (x->ab_index != 0) {
+    if (block != 0) {
       *totalrate = 0;
       *totaldist = 0;
       return;
@@ -886,22 +865,6 @@ static void update_stats(VP9_COMP *cpi) {
   }
 }
 
-static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  switch (bsize) {
-    case BLOCK_64X64:
-      return &x->sb64_partitioning;
-    case BLOCK_32X32:
-      return &x->sb_partitioning[x->sb_index];
-    case BLOCK_16X16:
-      return &x->mb_partitioning[x->sb_index][x->mb_index];
-    case BLOCK_8X8:
-      return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index];
-    default:
-      assert(0);
-      return NULL;
-  }
-}
-
 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
@@ -967,19 +930,12 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
 
 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
                      TOKENEXTRA **tp, int mi_row, int mi_col,
-                     int output_enabled, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
+                     int output_enabled, BLOCK_SIZE bsize,
+                     PICK_MODE_CONTEXT *ctx) {
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index > 0)
-      return;
-  }
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize,
-               output_enabled);
-  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+  update_state(cpi, ctx, mi_row, mi_col, bsize, output_enabled);
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
   if (output_enabled) {
     update_stats(cpi);
@@ -991,7 +947,8 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
                       TOKENEXTRA **tp, int mi_row, int mi_col,
-                      int output_enabled, BLOCK_SIZE bsize) {
+                      int output_enabled, BLOCK_SIZE bsize,
+                      PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -999,61 +956,58 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
   const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
   int ctx;
   PARTITION_TYPE partition;
-  BLOCK_SIZE subsize;
+  BLOCK_SIZE subsize = bsize;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
   if (bsize >= BLOCK_8X8) {
     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-    subsize = *get_sb_partitioning(x, bsize);
+    subsize = get_subsize(bsize, pc_tree->partitioning);
   } else {
     ctx = 0;
     subsize = BLOCK_4X4;
   }
 
   partition = partition_lookup[bsl][subsize];
+  if (output_enabled && bsize != BLOCK_4X4)
+    cm->counts.partition[ctx][partition]++;
 
   switch (partition) {
     case PARTITION_NONE:
-      if (output_enabled && bsize >= BLOCK_8X8)
-        cm->counts.partition[ctx][PARTITION_NONE]++;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->none);
       break;
     case PARTITION_VERT:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_VERT]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
-        encode_b(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->vertical[0]);
+      if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+                 &pc_tree->vertical[1]);
       }
       break;
     case PARTITION_HORZ:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_HORZ]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
-        encode_b(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize);
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+               &pc_tree->horizontal[0]);
+      if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+                 &pc_tree->horizontal[1]);
       }
       break;
     case PARTITION_SPLIT:
-      subsize = get_subsize(bsize, PARTITION_SPLIT);
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_SPLIT]++;
-
-      *get_sb_index(x, subsize) = 0;
-      encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 1;
-      encode_sb(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 2;
-      encode_sb(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 3;
-      encode_sb(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
-                subsize);
+      if (bsize == BLOCK_8X8) {
+        encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                 pc_tree->leaf_split[0]);
+      } else {
+        encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  pc_tree->split[0]);
+        encode_sb(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+                  pc_tree->split[1]);
+        encode_sb(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+                  pc_tree->split[2]);
+        encode_sb(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+                  subsize, pc_tree->split[3]);
+      }
       break;
     default:
       assert("Invalid partition type.");
@@ -1404,20 +1358,14 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
 
 static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
                         TOKENEXTRA **tp, int mi_row, int mi_col,
-                        int output_enabled, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
+                     int output_enabled, BLOCK_SIZE bsize,
+                     PICK_MODE_CONTEXT *ctx) {
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index > 0)
-      return;
-  }
 
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize);
+  update_state_rt(cpi, ctx, mi_row, mi_col, bsize);
 
-  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
   update_stats(cpi);
 
   (*tp)->token = EOSB_TOKEN;
@@ -1426,7 +1374,8 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
                          TOKENEXTRA **tp, int mi_row, int mi_col,
-                         int output_enabled, BLOCK_SIZE bsize) {
+                         int output_enabled, BLOCK_SIZE bsize,
+                         PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1451,51 +1400,40 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   partition = partition_lookup[bsl][subsize];
+  if (output_enabled && bsize != BLOCK_4X4)
+    cm->counts.partition[ctx][partition]++;
 
   switch (partition) {
     case PARTITION_NONE:
-      if (output_enabled && bsize >= BLOCK_8X8)
-        cm->counts.partition[ctx][PARTITION_NONE]++;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->none);
       break;
     case PARTITION_VERT:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_VERT]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->vertical[0]);
+      if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
         encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
-                    subsize);
+                    subsize, &pc_tree->vertical[1]);
       }
       break;
     case PARTITION_HORZ:
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_HORZ]++;
-      *get_sb_index(x, subsize) = 0;
-      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
+      encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                  &pc_tree->horizontal[0]);
+      if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
         encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
-                    subsize);
+                    subsize, &pc_tree->horizontal[1]);
       }
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      if (output_enabled)
-        cm->counts.partition[ctx][PARTITION_SPLIT]++;
-
-      *get_sb_index(x, subsize) = 0;
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
-      *get_sb_index(x, subsize) = 1;
+      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize,
+                   pc_tree->split[0]);
       encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
-                   subsize);
-      *get_sb_index(x, subsize) = 2;
+                   subsize, pc_tree->split[1]);
       encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
-                   subsize);
-      *get_sb_index(x, subsize) = 3;
+                   subsize, pc_tree->split[2]);
       encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
-                   subsize);
+                   subsize, pc_tree->split[3]);
       break;
     default:
       assert("Invalid partition type.");
@@ -1510,7 +1448,8 @@ static void rd_use_partition(VP9_COMP *cpi,
                              MODE_INFO **mi_8x8,
                              TOKENEXTRA **tp, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                             int do_recon) {
+                             int do_recon, PC_TREE *pc_tree,
+                             int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1536,6 +1475,7 @@ static void rd_use_partition(VP9_COMP *cpi,
   int splits_below = 0;
   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
   int do_partition_search = 1;
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
@@ -1546,17 +1486,7 @@ static void rd_use_partition(VP9_COMP *cpi,
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  } else {
-    *(get_sb_partitioning(x, bsize)) = subsize;
-  }
+  pc_tree->partitioning = partition;
   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
   if (bsize == BLOCK_16X16) {
@@ -1570,7 +1500,7 @@ static void rd_use_partition(VP9_COMP *cpi,
     do_partition_search = 0;
     if (mi_row + (mi_step >> 1) < cm->mi_rows &&
         mi_col + (mi_step >> 1) < cm->mi_cols) {
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
       bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
       subsize = bsize;
       partition = PARTITION_NONE;
@@ -1597,9 +1527,9 @@ static void rd_use_partition(VP9_COMP *cpi,
     if (partition != PARTITION_NONE && !splits_below &&
         mi_row + (mi_step >> 1) < cm->mi_rows &&
         mi_col + (mi_step >> 1) < cm->mi_cols) {
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
-                       get_block_context(x, bsize), INT64_MAX);
+                       ctx, INT64_MAX, 0);
 
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
 
@@ -1610,31 +1540,28 @@ static void rd_use_partition(VP9_COMP *cpi,
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
       mi_8x8[0]->mbmi.sb_type = bs_type;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = partition;
     }
   }
 
   switch (partition) {
     case PARTITION_NONE:
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, bsize,
-                       get_block_context(x, bsize), INT64_MAX);
+                       &last_part_dist, bsize, ctx, INT64_MAX, 0);
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, subsize,
-                       get_block_context(x, subsize), INT64_MAX);
+                       &last_part_dist, subsize, &pc_tree->horizontal[0],
+                       INT64_MAX, 0);
       if (last_part_rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                     subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-        *get_sb_index(x, subsize) = 1;
+        PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+        update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
-                         subsize, get_block_context(x, subsize), INT64_MAX);
+                         subsize, &pc_tree->horizontal[1], INT64_MAX, 1);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1646,20 +1573,19 @@ static void rd_use_partition(VP9_COMP *cpi,
       }
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
-                       &last_part_dist, subsize,
-                       get_block_context(x, subsize), INT64_MAX);
+                       &last_part_dist, subsize, &pc_tree->vertical[0],
+                       INT64_MAX, 0);
       if (last_part_rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                     subsize, 0);
-        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-        *get_sb_index(x, subsize) = 1;
+        PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+        update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+        encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
-                         subsize, get_block_context(x, subsize), INT64_MAX);
+                         subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
+                         INT64_MAX, 1);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1670,7 +1596,12 @@ static void rd_use_partition(VP9_COMP *cpi,
       }
       break;
     case PARTITION_SPLIT:
-      // Split partition.
+      if (bsize == BLOCK_8X8) {
+        rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
+                         &last_part_dist, subsize, pc_tree->leaf_split[0],
+                         INT64_MAX, 0);
+        break;
+      }
       last_part_rate = 0;
       last_part_dist = 0;
       for (i = 0; i < 4; i++) {
@@ -1683,11 +1614,9 @@ static void rd_use_partition(VP9_COMP *cpi,
         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
           continue;
 
-        *get_sb_index(x, subsize) = i;
-
         rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
                          mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
-                         i != 3);
+                         i != 3, pc_tree->split[i], i);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1719,6 +1648,7 @@ static void rd_use_partition(VP9_COMP *cpi,
     chosen_rate = 0;
     chosen_dist = 0;
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+    pc_tree->partitioning = PARTITION_SPLIT;
 
     // Split partition.
     for (i = 0; i < 4; i++) {
@@ -1732,15 +1662,11 @@ static void rd_use_partition(VP9_COMP *cpi,
       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
         continue;
 
-      *get_sb_index(x, split_subsize) = i;
-      *get_sb_partitioning(x, bsize) = split_subsize;
-      *get_sb_partitioning(x, split_subsize) = split_subsize;
-
       save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-
+      pc_tree->split[i]->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
-                       split_subsize, get_block_context(x, split_subsize),
-                       INT64_MAX);
+                       split_subsize, &pc_tree->split[i]->none,
+                       INT64_MAX, i);
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
@@ -1755,7 +1681,7 @@ static void rd_use_partition(VP9_COMP *cpi,
 
       if (i != 3)
         encode_sb(cpi, tile, tp,  mi_row + y_idx, mi_col + x_idx, 0,
-                  split_subsize);
+                  split_subsize, pc_tree->split[i]);
 
       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
                                    split_subsize);
@@ -1768,19 +1694,19 @@ static void rd_use_partition(VP9_COMP *cpi,
     }
   }
 
-  // If last_part is better set the partitioning to that...
+  // If last_part is better set the partitioning to that.
   if (last_part_rd < chosen_rd) {
     mi_8x8[0]->mbmi.sb_type = bsize;
     if (bsize >= BLOCK_8X8)
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = partition;
     chosen_rate = last_part_rate;
     chosen_dist = last_part_dist;
     chosen_rd = last_part_rd;
   }
-  // If none was better set the partitioning to that...
+  // If none was better set the partitioning to that.
   if (none_rd < chosen_rd) {
     if (bsize >= BLOCK_8X8)
-      *(get_sb_partitioning(x, bsize)) = bsize;
+      pc_tree->partitioning = PARTITION_NONE;
     chosen_rate = none_rate;
     chosen_dist = none_dist;
   }
@@ -1806,8 +1732,8 @@ static void rd_use_partition(VP9_COMP *cpi,
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               chosen_rate, chosen_dist);
-
-    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize,
+              pc_tree);
   }
 
   *rate = chosen_rate;
@@ -1951,7 +1877,8 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
 static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                               TOKENEXTRA **tp, int mi_row,
                               int mi_col, BLOCK_SIZE bsize, int *rate,
-                              int64_t *dist, int do_recon, int64_t best_rd) {
+                              int64_t *dist, int do_recon, int64_t best_rd,
+                              PC_TREE *pc_tree, int block) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1959,7 +1886,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
   TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i, pl;
   BLOCK_SIZE subsize;
   int this_rate, sum_rate = 0, best_rate = INT_MAX;
@@ -1980,15 +1907,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                                bsize >= BLOCK_8X8;
   (void) *tp_orig;
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  }
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
@@ -1998,7 +1916,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   } else {
     x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
   }
-
   // Determine partition types in search according to the speed features.
   // The threshold set here has to be of square block size.
   if (cpi->sf.auto_min_max_partition_size) {
@@ -2035,7 +1952,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
-                     ctx, best_rd);
+                     ctx, best_rd, 0);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
         pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2050,7 +1967,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         best_dist = this_dist;
         best_rd = sum_rd;
         if (bsize >= BLOCK_8X8)
-          *(get_sb_partitioning(x, bsize)) = bsize;
+          pc_tree->partitioning = PARTITION_NONE;
 
         // Adjust threshold according to partition size.
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
@@ -2082,29 +1999,47 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   // the starting point of motion search in the following partition type check.
   if (do_split) {
     subsize = get_subsize(bsize, PARTITION_SPLIT);
-    for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
-      const int x_idx = (i & 1) * mi_step;
-      const int y_idx = (i >> 1) * mi_step;
-
-      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
-        continue;
-
-      *get_sb_index(x, subsize) = i;
-      if (cpi->sf.adaptive_motion_search)
-        load_pred_mv(x, ctx);
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
+    if (bsize == BLOCK_8X8) {
+      i = 4;
+      if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
+        pc_tree->leaf_split[0]->pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
-      rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
-                        &this_rate, &this_dist, i != 3, best_rd - sum_rd);
 
-      if (this_rate == INT_MAX) {
+      rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+                       pc_tree->leaf_split[0], best_rd, 0);
+
+      if (sum_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
-        sum_rate += this_rate;
-        sum_dist += this_dist;
         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+        if (sum_rd < best_rd) {
+          update_state(cpi, ctx, mi_row, mi_col, bsize, 0);
+          encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
+          update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+        }
+      }
+    } else {
+      for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+      const int x_idx = (i & 1) * mi_step;
+      const int y_idx = (i >> 1) * mi_step;
+
+        if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+          continue;
+
+        if (cpi->sf.adaptive_motion_search)
+          load_pred_mv(x, ctx);
+
+        rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+                          subsize, &this_rate, &this_dist, i != 3,
+                          best_rd - sum_rd, pc_tree->split[i], i);
+
+        if (this_rate == INT_MAX) {
+          sum_rd = INT64_MAX;
+        } else {
+          sum_rate += this_rate;
+          sum_dist += this_dist;
+          sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+        }
       }
     }
     if (sum_rd < best_rd && i == 4) {
@@ -2115,7 +2050,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         best_rate = sum_rate;
         best_dist = sum_dist;
         best_rd = sum_rd;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_SPLIT;
       }
     } else {
       // skip rectangular partition test when larger block size
@@ -2125,36 +2060,33 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_interp_filter =
+      pc_tree->horizontal[0].pred_interp_filter =
           ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
-                     get_block_context(x, subsize), best_rd);
+                     &pc_tree->horizontal[0], best_rd, 0);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
     if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
-      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                   subsize, 0);
-      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+      update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
+      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
 
-      *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
+        pc_tree->horizontal[1].pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
-                       &this_dist, subsize, get_block_context(x, subsize),
-                       best_rd - sum_rd);
+                       &this_dist, subsize, &pc_tree->horizontal[1],
+                       best_rd - sum_rd, 1);
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2171,41 +2103,39 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         best_rd = sum_rd;
         best_rate = sum_rate;
         best_dist = sum_dist;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_HORZ;
       }
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // PARTITION_VERT
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      get_block_context(x, subsize)->pred_interp_filter =
+      pc_tree->vertical[0].pred_interp_filter =
           ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
-                     get_block_context(x, subsize), best_rd);
+                     &pc_tree->vertical[0], best_rd, 0);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
     if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
-      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
-                   subsize, 0);
-      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+      update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
+      encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize,
+                        &pc_tree->vertical[0]);
 
-      *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
         load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        get_block_context(x, subsize)->pred_interp_filter =
+        pc_tree->vertical[bsize > BLOCK_8X8].pred_interp_filter =
             ctx->mic.mbmi.interp_filter;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
-                       &this_dist, subsize, get_block_context(x, subsize),
-                       best_rd - sum_rd);
+                       &this_dist, subsize,
+                       &pc_tree->vertical[bsize > BLOCK_8X8], best_rd - sum_rd,
+                       1);
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2222,12 +2152,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         best_rate = sum_rate;
         best_dist = sum_dist;
         best_rd = sum_rd;
-        *(get_sb_partitioning(x, bsize)) = subsize;
+        pc_tree->partitioning = PARTITION_VERT;
       }
     }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
-
   // TODO(jbb): This code added so that we avoid static analysis
   // warning related to the fact that best_rd isn't used after this
   // point.  This code should be refactored so that the duplicate
@@ -2246,12 +2175,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
                                     best_rate);
     }
-
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
+    if (bsize == BLOCK_4X4)
+      encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, ctx);
+    else
+      encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
 
-    encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
   if (bsize == BLOCK_64X64) {
     assert(tp_orig < *tp);
@@ -2279,18 +2210,18 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     int dummy_rate;
     int64_t dummy_dist;
 
-    BLOCK_SIZE i;
+    int i;
     MACROBLOCK *x = &cpi->mb;
 
     if (sf->adaptive_pred_interp_filter) {
-      for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
-        const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-        const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-        const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-        for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index)
-          for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index)
-            for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index)
-              get_block_context(x, i)->pred_interp_filter = SWITCHABLE;
+      for (i = 0; i < 64; ++i)
+        x->leaf_tree[i].pred_interp_filter = SWITCHABLE;
+
+      for (i = 0; i < 64; ++i) {
+        x->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
+        x->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
       }
     }
 
@@ -2310,18 +2241,18 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
                                sf->always_this_block_size);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
         choose_partitioning(cpi, tile, mi_row, mi_col);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
+                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
       } else {
         if ((cm->current_video_frame
             % sf->last_partitioning_redo_frequency) == 0
@@ -2340,7 +2271,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                     &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                            &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                            &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root,
+                            0);
         } else {
           if (sf->constrain_copy_partition &&
               sb_has_motion(cm, prev_mi_8x8))
@@ -2349,7 +2281,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
           else
             copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                           &dummy_rate, &dummy_dist, 1);
+                           &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
         }
       }
     } else {
@@ -2361,7 +2293,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                        &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                        &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, 0);
     }
   }
 }
@@ -2523,10 +2455,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
                               int mi_row, int mi_col,
-                              BLOCK_SIZE bsize, BLOCK_SIZE subsize) {
+                              BLOCK_SIZE bsize, BLOCK_SIZE subsize,
+                              PC_TREE *pc_tree) {
   MACROBLOCKD *xd = &x->e_mbd;
   int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
-  PARTITION_TYPE partition = partition_lookup[bsl][subsize];
+  PARTITION_TYPE partition = pc_tree->partitioning;
 
   assert(bsize >= BLOCK_8X8);
 
@@ -2536,48 +2469,42 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
   switch (partition) {
     case PARTITION_NONE:
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->none.mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->vertical[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
 
       if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
         set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs);
-        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        *(xd->mi[0]) = pc_tree->vertical[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize);
       }
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       set_modeinfo_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      *(xd->mi[0]) = pc_tree->horizontal[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
       if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
         set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col);
-        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        *(xd->mi[0]) = pc_tree->horizontal[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize);
       }
       break;
-    case PARTITION_SPLIT:
-      *get_sb_index(x, subsize) = 0;
+    case PARTITION_SPLIT: {
+      BLOCK_SIZE subsubsize = get_subsize(subsize, PARTITION_SPLIT);
       fill_mode_info_sb(cm, x, mi_row, mi_col, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 1;
+                        subsubsize, pc_tree->split[0]);
       fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 2;
+                        subsubsize, pc_tree->split[1]);
       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
-                        *(get_sb_partitioning(x, subsize)));
-      *get_sb_index(x, subsize) = 3;
+                        subsubsize, pc_tree->split[2]);
       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
-                        *(get_sb_partitioning(x, subsize)));
+                        subsubsize, pc_tree->split[3]);
       break;
+    }
     default:
       break;
   }
@@ -2586,15 +2513,16 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
 static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                                  TOKENEXTRA **tp, int mi_row,
                                  int mi_col, BLOCK_SIZE bsize, int *rate,
-                                 int64_t *dist, int do_recon, int64_t best_rd) {
+                                 int64_t *dist, int do_recon, int64_t best_rd,
+                                 PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
   TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i;
-  BLOCK_SIZE subsize;
+  BLOCK_SIZE subsize = bsize;
   int this_rate, sum_rate = 0, best_rate = INT_MAX;
   int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
   int64_t sum_rd = 0;
@@ -2613,16 +2541,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                                bsize >= BLOCK_8X8;
   (void) *tp_orig;
 
-  if (bsize < BLOCK_8X8) {
-    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
-    // there is nothing to be done.
-    if (x->ab_index != 0) {
-      *rate = 0;
-      *dist = 0;
-      return;
-    }
-  }
-
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
@@ -2667,7 +2585,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         best_dist = this_dist;
         best_rd = sum_rd;
         if (bsize >= BLOCK_8X8)
-          *(get_sb_partitioning(x, bsize)) = bsize;
+          pc_tree->partitioning = PARTITION_NONE;
 
         // Adjust threshold according to partition size.
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
@@ -2704,12 +2622,11 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
         continue;
 
-      *get_sb_index(x, subsize) = i;
       load_pred_mv(x, ctx);
 
       nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
                            subsize, &this_rate, &this_dist, 0,
-                           best_rd - sum_rd);
+                           best_rd - sum_rd, pc_tree->split[i]);
 
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
@@ -2724,7 +2641,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       best_rate = sum_rate;
       best_dist = sum_dist;
       best_rd = sum_rd;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_SPLIT;
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
@@ -2736,26 +2653,22 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, subsize);
 
-    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+    pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
 
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
     if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
-      *get_sb_index(x, subsize) = 1;
-
       load_pred_mv(x, ctx);
-
       nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
                           &this_rate, &this_dist, subsize);
 
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
 
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
@@ -2771,7 +2684,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       best_rd = sum_rd;
       best_rate = sum_rate;
       best_dist = sum_dist;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_HORZ;
     }
   }
 
@@ -2779,24 +2692,18 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, subsize);
-    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+    pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
     if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
-      *get_sb_index(x, subsize) = 1;
-
       load_pred_mv(x, ctx);
-
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
                           &this_rate, &this_dist, subsize);
-
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
-
+      pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
       } else {
@@ -2811,9 +2718,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       best_rate = sum_rate;
       best_dist = sum_dist;
       best_rd = sum_rd;
-      *(get_sb_partitioning(x, bsize)) = subsize;
+      pc_tree->partitioning = PARTITION_VERT;
     }
   }
+  // TODO(JBB): The following line is here just to avoid a static warning
+  // that occurs because at this point we never again reuse best_rd
+  // despite setting it here.  The code should be refactored to avoid this.
+  (void) best_rd;
 
   *rate = best_rate;
   *dist = best_dist;
@@ -2822,8 +2733,9 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     return;
 
   // update mode info array
-  fill_mode_info_sb(cm, x, mi_row, mi_col, bsize,
-                    *(get_sb_partitioning(x, bsize)));
+  subsize = get_subsize(bsize, pc_tree->partitioning);
+  fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, subsize,
+                    pc_tree);
 
   if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
     int output_enabled = (bsize == BLOCK_64X64);
@@ -2840,7 +2752,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
 
-    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
   }
 
   if (bsize == BLOCK_64X64) {
@@ -2858,7 +2770,8 @@ static void nonrd_use_partition(VP9_COMP *cpi,
                                 TOKENEXTRA **tp,
                                 int mi_row, int mi_col,
                                 BLOCK_SIZE bsize, int output_enabled,
-                                int *totrate, int64_t *totdist) {
+                                int *totrate, int64_t *totdist,
+                                PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2878,17 +2791,15 @@ static void nonrd_use_partition(VP9_COMP *cpi,
   switch (partition) {
     case PARTITION_NONE:
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
       break;
     case PARTITION_VERT:
-      *get_sb_index(x, subsize) = 0;
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
       if (mi_col + hbs < cm->mi_cols) {
-        *get_sb_index(x, subsize) = 1;
         nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
                             &rate, &dist, subsize);
-        get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+        pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2897,14 +2808,12 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       }
       break;
     case PARTITION_HORZ:
-      *get_sb_index(x, subsize) = 0;
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
-      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
       if (mi_row + hbs < cm->mi_rows) {
-        *get_sb_index(x, subsize) = 1;
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize);
-        get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = mi_8x8[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2914,31 +2823,28 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      *get_sb_index(x, subsize) = 0;
       nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
-                          subsize, output_enabled, totrate, totdist);
-      *get_sb_index(x, subsize) = 1;
+                          subsize, output_enabled, totrate, totdist,
+                          pc_tree->split[0]);
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
                           mi_row, mi_col + hbs, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[1]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
         *totdist += dist;
       }
-      *get_sb_index(x, subsize) = 2;
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
                           mi_row + hbs, mi_col, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[2]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
         *totdist += dist;
       }
-      *get_sb_index(x, subsize) = 3;
       nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
                           mi_row + hbs, mi_col + hbs, subsize, output_enabled,
-                          &rate, &dist);
+                          &rate, &dist, pc_tree->split[3]);
       if (rate != INT_MAX && dist != INT64_MAX &&
           *totrate != INT_MAX && *totdist != INT64_MAX) {
         *totrate += rate;
@@ -2953,7 +2859,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               *totrate, *totdist);
-    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree);
   }
 }
 
@@ -2970,6 +2876,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
        mi_col += MI_BLOCK_SIZE) {
+    MACROBLOCK *x = &cpi->mb;
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -2985,12 +2892,12 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case SOURCE_VAR_BASED_PARTITION:
         set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
@@ -2999,17 +2906,19 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist);
+                            1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case REFERENCE_PARTITION:
         if (cpi->sf.partition_check ||
             !is_background(cpi, tile, mi_row, mi_col)) {
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                               &dummy_rate, &dummy_dist, 1, INT64_MAX);
+                               &dummy_rate, &dummy_dist, 1, INT64_MAX,
+                               x->pc_root);
         } else {
           copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
-                              BLOCK_64X64, 1, &dummy_rate, &dummy_dist);
+                              BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
+                              x->pc_root);
         }
         break;
       default:
@@ -3072,7 +2981,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     int i;
     struct macroblock_plane *const p = x->plane;
     struct macroblockd_plane *const pd = xd->plane;
-    PICK_MODE_CONTEXT *ctx = &cpi->mb.sb64_context;
+    PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
 
     for (i = 0; i < MAX_MB_PLANE; ++i) {
       p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -3338,14 +3247,14 @@ static int get_zbin_mode_boost(const MB_MODE_INFO *mbmi, int enabled) {
 }
 
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize) {
+                              int mi_row, int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
-  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
   unsigned int segment_id = mbmi->segment_id;
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index d26b464b92ffd48a92bef5cdea194dd511681761..b155fd39e314d89886861f344c2d6d770bcf6436 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -474,7 +474,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
   TileInfo tile;
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
-  const PICK_MODE_CONTEXT *ctx = &x->sb64_context;
+  const PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
   int i;
 
   int recon_yoffset, recon_uvoffset;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index c609a27e966ce017be79a20e4ec29a569b3cd477..0d6b62509b570fae10eba236a2a44d0dad799352 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -31,6 +31,7 @@
 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
 #include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_context_tree.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_firstpass.h"
@@ -179,6 +180,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   vpx_free(cpi->tok);
   cpi->tok = 0;
 
+  vp9_free_pc_tree(&cpi->mb);
+
   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
     vpx_free(lc->rc_twopass_stats_in.buf);
@@ -563,6 +566,8 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
 
     CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
   }
+
+  vp9_setup_pc_tree(&cpi->common, &cpi->mb);
 }
 
 
@@ -880,124 +885,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
   } while (++i <= MV_MAX);
 }
 
-static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
-                               PICK_MODE_CONTEXT *ctx) {
-  int num_pix = num_4x4_blk << 4;
-  int i, k;
-  ctx->num_4x4_blk = num_4x4_blk;
-
-  CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
-                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    for (k = 0; k < 3; ++k) {
-      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(int16_t)));
-      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      vpx_memalign(16, num_pix * sizeof(uint16_t)));
-      ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
-      ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
-      ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
-      ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k];
-    }
-  }
-}
-
-static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
-  int i, k;
-  vpx_free(ctx->zcoeff_blk);
-  ctx->zcoeff_blk = 0;
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    for (k = 0; k < 3; ++k) {
-      vpx_free(ctx->coeff[i][k]);
-      ctx->coeff[i][k] = 0;
-      vpx_free(ctx->qcoeff[i][k]);
-      ctx->qcoeff[i][k] = 0;
-      vpx_free(ctx->dqcoeff[i][k]);
-      ctx->dqcoeff[i][k] = 0;
-      vpx_free(ctx->eobs[i][k]);
-      ctx->eobs[i][k] = 0;
-    }
-  }
-}
-
-static void init_pick_mode_context(VP9_COMP *cpi) {
-  int i;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x  = &cpi->mb;
-
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-    if (i < BLOCK_16X16) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
-          for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
-            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-            alloc_mode_context(cm, num_4x4_blk, ctx);
-          }
-        }
-      }
-    } else if (i < BLOCK_32X32) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
-          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-          ctx->num_4x4_blk = num_4x4_blk;
-          alloc_mode_context(cm, num_4x4_blk, ctx);
-        }
-      }
-    } else if (i < BLOCK_64X64) {
-      for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
-        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-        ctx->num_4x4_blk = num_4x4_blk;
-        alloc_mode_context(cm, num_4x4_blk, ctx);
-      }
-    } else {
-      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-      ctx->num_4x4_blk = num_4x4_blk;
-      alloc_mode_context(cm, num_4x4_blk, ctx);
-    }
-  }
-}
-
-static void free_pick_mode_context(MACROBLOCK *x) {
-  int i;
-
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[i];
-    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
-    if (i < BLOCK_16X16) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
-          for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
-            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-            free_mode_context(ctx);
-          }
-        }
-      }
-    } else if (i < BLOCK_32X32) {
-      for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
-        for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
-          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-          free_mode_context(ctx);
-        }
-      }
-    } else if (i < BLOCK_64X64) {
-      for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
-        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-        free_mode_context(ctx);
-      }
-    } else {
-      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
-      free_mode_context(ctx);
-    }
-  }
-}
 
 VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
   int i, j;
@@ -1026,7 +913,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   init_config(cpi, oxcf);
   vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc);
-  init_pick_mode_context(cpi);
 
   cm->current_video_frame = 0;
 
@@ -1418,7 +1304,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
 #endif
   }
 
-  free_pick_mode_context(&cpi->mb);
   dealloc_compressor_data(cpi);
   vpx_free(cpi->mb.ss);
   vpx_free(cpi->tok);
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 24b8d9de15ca79bccd1e9f4051ec9e3d5fe9a358..75b0e9e5852a347948675f4e26151d1de22cc88f 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -18,6 +18,8 @@ VP9_CX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)
 VP9_CX_SRCS-yes += vp9_cx_iface.c
 
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
+VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
+VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
 VP9_CX_SRCS-yes += encoder/vp9_cost.h
 VP9_CX_SRCS-yes += encoder/vp9_cost.c
 VP9_CX_SRCS-yes += encoder/vp9_dct.c