diff --git a/configure b/configure
index 573ec36a98a4432183db5e5470d54e22910cfb89..372e2594c585a2bb2e4305e9f33ebfb9b7a8d39e 100755
--- a/configure
+++ b/configure
@@ -249,7 +249,6 @@ EXPERIMENT_LIST="
     oneshotq
     multiple_arf
     code_zerogroup
-    sb8x8
     non420
 "
 CONFIG_LIST="
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index a8b563c617cca1ffde99c368d54afaa497036758..97bb33e2ed8e24ef700bc6a10c848e3b6d162c27 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -83,9 +83,6 @@ typedef enum {
   D27_PRED,           /* Directional 22 deg prediction  [anti-clockwise from 0 deg hor] */
   D63_PRED,           /* Directional 67 deg prediction  [anti-clockwise from 0 deg hor] */
   TM_PRED,            /* Truemotion prediction */
-#if !CONFIG_SB8X8
-  I8X8_PRED,          /* 8x8 based prediction, each 8x8 has its own mode */
-#endif
   I4X4_PRED,          /* 4x4 based prediction, each 4x4 has its own mode */
   NEARESTMV,
   NEARMV,
@@ -128,9 +125,6 @@ typedef enum {
 
 #define VP9_YMODES  (I4X4_PRED + 1)
 #define VP9_UV_MODES (TM_PRED + 1)
-#if !CONFIG_SB8X8
-#define VP9_I8X8_MODES (TM_PRED + 1)
-#endif
 #define VP9_I32X32_MODES (TM_PRED + 1)
 
 #define VP9_MVREFS (1 + SPLITMV - NEARESTMV)
@@ -173,16 +167,6 @@ typedef enum {
 #define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES)  /* 10 */
 #endif
 
-#if !CONFIG_SB8X8
-typedef enum {
-  PARTITIONING_16X8 = 0,
-  PARTITIONING_8X16,
-  PARTITIONING_8X8,
-  PARTITIONING_4X4,
-  NB_PARTITIONINGS,
-} SPLITMV_PARTITIONING_TYPE;
-#endif
-
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
    is a single probability table. */
@@ -209,11 +193,9 @@ typedef enum {
 static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
   switch (sb_type) {
     case BLOCK_SIZE_AB4X4: return 0;
-#if CONFIG_SB8X8
     case BLOCK_SIZE_SB8X8:
     case BLOCK_SIZE_SB8X16: return 1;
     case BLOCK_SIZE_SB16X8:
-#endif
     case BLOCK_SIZE_MB16X16:
     case BLOCK_SIZE_SB16X32: return 2;
     case BLOCK_SIZE_SB32X16:
@@ -228,11 +210,9 @@ static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
 static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
   switch (sb_type) {
     case BLOCK_SIZE_AB4X4: return 0;
-#if CONFIG_SB8X8
     case BLOCK_SIZE_SB8X8:
     case BLOCK_SIZE_SB16X8: return 1;
     case BLOCK_SIZE_SB8X16:
-#endif
     case BLOCK_SIZE_MB16X16:
     case BLOCK_SIZE_SB32X16: return 2;
     case BLOCK_SIZE_SB16X32:
@@ -245,21 +225,13 @@ static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
 }
 
 static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
-#if CONFIG_SB8X8
   int a = b_width_log2(sb_type) - 1;
-#else
-  int a = b_width_log2(sb_type) - 2;
-#endif
   assert(a >= 0);
   return a;
 }
 
 static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
-#if CONFIG_SB8X8
   int a = b_height_log2(sb_type) - 1;
-#else
-  int a = b_height_log2(sb_type) - 2;
-#endif
   assert(a >= 0);
   return a;
 }
@@ -277,9 +249,6 @@ typedef struct {
 
   int mb_mode_context[MAX_REF_FRAMES];
 
-#if !CONFIG_SB8X8
-  SPLITMV_PARTITIONING_TYPE partitioning;
-#endif
   unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
   unsigned char need_to_clamp_mvs;
   unsigned char need_to_clamp_secondmv;
@@ -301,7 +270,7 @@ typedef struct {
 
 typedef struct {
   MB_MODE_INFO mbmi;
-  union b_mode_info bmi[16 >> (CONFIG_SB8X8 * 2)];
+  union b_mode_info bmi[4];
 } MODE_INFO;
 
 struct scale_factors {
@@ -443,9 +412,7 @@ typedef struct macroblockd {
 
   int sb_index;   // index of 32x32 block inside the 64x64 block
   int mb_index;   // index of 16x16 block inside the 32x32 block
-#if CONFIG_SB8X8
   int b_index;    // index of 8x8 block inside the 16x16 block
-#endif
   int q_index;
 
 } MACROBLOCKD;
@@ -462,11 +429,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
   if (bsl == 0)
     return;
 
-#if CONFIG_SB8X8
   bs = 1 << (bsl - 1);
-#else
-  bs = 1 << bsl;
-#endif
 
   // update the partition context at the end notes. set partition bits
   // of block sizes larger than the current one to be one, and partition
@@ -502,21 +465,13 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
   int above = 0, left = 0, i;
   int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
 
-#if CONFIG_SB8X8
   bs = 1 << (bsl - 1);
-#else
-  bs = 1 << bsl;
-#endif
 
   assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
   assert(bsl >= 0);
   assert(boffset >= 0);
 
-#if CONFIG_SB8X8
   bs = 1 << (bsl - 1);
-#else
-  bs = 1 << bsl;
-#endif
 
   for (i = 0; i < bs; i++)
     above |= (xd->above_seg_context[i] & (1 << boffset));
@@ -541,10 +496,8 @@ static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
         subsize = BLOCK_SIZE_SB64X32;
       else if (bsize == BLOCK_SIZE_SB32X32)
         subsize = BLOCK_SIZE_SB32X16;
-#if CONFIG_SB8X8
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB16X8;
-#endif
       else
         assert(0);
       break;
@@ -553,10 +506,8 @@ static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
         subsize = BLOCK_SIZE_SB32X64;
       else if (bsize == BLOCK_SIZE_SB32X32)
         subsize = BLOCK_SIZE_SB16X32;
-#if CONFIG_SB8X8
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB8X16;
-#endif
       else
         assert(0);
       break;
@@ -565,10 +516,8 @@ static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
         subsize = BLOCK_SIZE_SB32X32;
       else if (bsize == BLOCK_SIZE_SB32X32)
         subsize = BLOCK_SIZE_MB16X16;
-#if CONFIG_SB8X8
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB8X8;
-#endif
       else
         assert(0);
       break;
@@ -659,42 +608,6 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
           xd->mode_info_context->bmi[ib].as_mode.context :
 #endif
         xd->mode_info_context->bmi[ib].as_mode.first);
-#if !CONFIG_SB8X8
-  } else if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
-             xd->q_index < ACTIVE_HT) {
-    const int ic = (ib & 10);
-#if USE_ADST_FOR_I8X8_4X4
-#if USE_ADST_PERIPHERY_ONLY
-    // Use ADST for periphery blocks only
-    const int inner = ib & 5;
-    tx_type = txfm_map(pred_mode_conv(
-        (MB_PREDICTION_MODE)xd->mode_info_context->bmi[ic].as_mode.first));
-
-#if USE_ADST_FOR_REMOTE_EDGE
-    if (inner == 5)
-      tx_type = DCT_DCT;
-#else
-    if (inner == 1) {
-      if (tx_type == ADST_ADST) tx_type = ADST_DCT;
-      else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
-    } else if (inner == 4) {
-      if (tx_type == ADST_ADST) tx_type = DCT_ADST;
-      else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
-    } else if (inner == 5) {
-      tx_type = DCT_DCT;
-    }
-#endif
-#else
-    // Use ADST
-    b += ic - ib;
-    tx_type = txfm_map(pred_mode_conv(
-        (MB_PREDICTION_MODE)b->bmi.as_mode.first));
-#endif
-#else
-    // Use 2D DCT
-    tx_type = DCT_DCT;
-#endif
-#endif  // !CONFIG_SB8X8
   } else if (xd->mode_info_context->mbmi.mode <= TM_PRED &&
              xd->q_index < ACTIVE_HT) {
 #if USE_ADST_FOR_I16X16_4X4
@@ -739,15 +652,6 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
 #endif
   if (ib >= (1 << (wb + hb)))  // no chroma adst
     return tx_type;
-#if !CONFIG_SB8X8
-  if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
-      xd->q_index < ACTIVE_HT8) {
-    // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
-    // or the relationship otherwise modified to address this type conversion.
-    tx_type = txfm_map(pred_mode_conv(
-           (MB_PREDICTION_MODE)xd->mode_info_context->bmi[ib].as_mode.first));
-  } else
-#endif  // CONFIG_SB8X8
   if (xd->mode_info_context->mbmi.mode <= TM_PRED &&
       xd->q_index < ACTIVE_HT8) {
 #if USE_ADST_FOR_I16X16_8X8
@@ -821,9 +725,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *xd);
 static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
   const TX_SIZE size = mbmi->txfm_size;
-#if !CONFIG_SB8X8
-  const MB_PREDICTION_MODE mode = mbmi->mode;
-#endif  // !CONFIG_SB8X8
 
   switch (mbmi->sb_type) {
     case BLOCK_SIZE_SB64X64:
@@ -835,7 +736,6 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
         return TX_16X16;
       else
         return size;
-#if CONFIG_SB8X8
     case BLOCK_SIZE_SB32X16:
     case BLOCK_SIZE_SB16X32:
     case BLOCK_SIZE_MB16X16:
@@ -845,15 +745,6 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
         return size;
     default:
       return TX_4X4;
-#else  // CONFIG_SB8X8
-    default:
-      if (size == TX_16X16)
-        return TX_8X8;
-      else if (size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV))
-        return TX_4X4;
-      else
-        return size;
-#endif  // CONFIG_SB8X8
   }
 
   return size;
@@ -891,9 +782,6 @@ typedef void (*foreach_transformed_block_visitor)(int plane, int block,
                                                   void *arg);
 static INLINE void foreach_transformed_block_in_plane(
     const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
-#if !CONFIG_SB8X8
-    int is_split,
-#endif  // !CONFIG_SB8X8
     foreach_transformed_block_visitor visit, void *arg) {
   const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
 
@@ -913,9 +801,6 @@ static INLINE void foreach_transformed_block_in_plane(
   const int ss_max = MAX(xd->plane[plane].subsampling_x,
                          xd->plane[plane].subsampling_y);
   const int ss_txfrm_size = txfrm_size_b > ss_block_size
-#if !CONFIG_SB8X8
-                            || is_split
-#endif  // !CONFIG_SB8X8
                                 ? txfrm_size_b - ss_max * 2
                                 : txfrm_size_b;
   const int step = 1 << ss_txfrm_size;
@@ -932,24 +817,10 @@ static INLINE void foreach_transformed_block_in_plane(
 static INLINE void foreach_transformed_block(
     const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
     foreach_transformed_block_visitor visit, void *arg) {
-#if !CONFIG_SB8X8
-  const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
-  const int is_split =
-      xd->mode_info_context->mbmi.txfm_size == TX_8X8 &&
-      (mode == I8X8_PRED || mode == SPLITMV);
-#endif  // !CONFIG_SB8X8
   int plane;
 
   for (plane = 0; plane < MAX_MB_PLANE; plane++) {
-#if !CONFIG_SB8X8
-    const int is_split_chroma = is_split &&
-         xd->plane[plane].plane_type == PLANE_TYPE_UV;
-#endif  // !CONFIG_SB8X8
-
     foreach_transformed_block_in_plane(xd, bsize, plane,
-#if !CONFIG_SB8X8
-                                       is_split_chroma,
-#endif  // !CONFIG_SB8X8
                                        visit, arg);
   }
 }
@@ -957,19 +828,10 @@ static INLINE void foreach_transformed_block(
 static INLINE void foreach_transformed_block_uv(
     const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
     foreach_transformed_block_visitor visit, void *arg) {
-#if !CONFIG_SB8X8
-  const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
-  const int is_split =
-      xd->mode_info_context->mbmi.txfm_size == TX_8X8 &&
-      (mode == I8X8_PRED || mode == SPLITMV);
-#endif  // !CONFIG_SB8X8
   int plane;
 
   for (plane = 1; plane < MAX_MB_PLANE; plane++) {
     foreach_transformed_block_in_plane(xd, bsize, plane,
-#if !CONFIG_SB8X8
-                                       is_split,
-#endif  // !CONFIG_SB8X8
                                        visit, arg);
   }
 }
@@ -997,16 +859,8 @@ static INLINE void foreach_predicted_block_in_plane(
   int pred_w, pred_h;
 
   if (mode == SPLITMV) {
-#if CONFIG_SB8X8
     pred_w = 0;
     pred_h = 0;
-#else
-    // 4x4 or 8x8
-    const int is_4x4 =
-        (xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4);
-    pred_w = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_x;
-    pred_h = is_4x4 ? 0 : 1 >> xd->plane[plane].subsampling_y;
-#endif
   } else {
     pred_w = bw;
     pred_h = bh;
@@ -1099,13 +953,6 @@ static TX_SIZE tx_size_for_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
     return xd->mode_info_context->mbmi.txfm_size;
   } else {
     const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
-#if !CONFIG_SB8X8
-    const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
-    const int is_split =
-        xd->mode_info_context->mbmi.txfm_size == TX_8X8 &&
-        (mode == I8X8_PRED || mode == SPLITMV);
-#endif
-
     // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
     // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
     const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -1122,9 +969,6 @@ static TX_SIZE tx_size_for_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
     const int ss_max = MAX(xd->plane[plane].subsampling_x,
                            xd->plane[plane].subsampling_y);
     const int ss_txfrm_size = txfrm_size_b > ss_block_size
-#if !CONFIG_SB8X8
-                            || is_split
-#endif  // !CONFIG_SB8X8
                                   ? txfrm_size_b - ss_max * 2
                                   : txfrm_size_b;
     return (TX_SIZE)(ss_txfrm_size / 2);
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index f0a5d97a9bde12741da1b6824eff860adbdd15a0..31c1a52c041129edfc9730cabdedf0ceeed969bf 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -16,7 +16,6 @@
 #include "vpx_mem/vpx_mem.h"
 
 static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = {
-#if CONFIG_SB8X8
   /* DC V   H  D45 135 117 153 D27 D63 TM i4X4 */
   {12,  6,  5,  5,  5,  5,  5,  5,  5,  2, 200},
   {25, 13, 13,  7,  7,  7,  7,  7,  7,  6, 160},
@@ -26,27 +25,11 @@ static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = {
   {68, 33, 35,  8,  8,  8,  8,  8,  8, 17,  68},
   {78, 38, 38,  8,  8,  8,  8,  8,  8, 19,  52},
   {89, 42, 42,  8,  8,  8,  8,  8,  8, 21,  34},
-#else
-  /* DC V   H  D45 135 117 153 D27 D63 TM i8x8 i4X4 */
-  {12,  6,  5,  5,  5,  5,  5,  5,  5,  2, 22, 200},
-  {25, 13, 13,  7,  7,  7,  7,  7,  7,  6, 27, 160},
-  {31, 17, 18,  8,  8,  8,  8,  8,  8,  9, 26, 139},
-  {40, 22, 23,  8,  8,  8,  8,  8,  8, 12, 27, 116},
-  {53, 26, 28,  8,  8,  8,  8,  8,  8, 13, 26,  94},
-  {68, 33, 35,  8,  8,  8,  8,  8,  8, 17, 20,  68},
-  {78, 38, 38,  8,  8,  8,  8,  8,  8, 19, 16,  52},
-  {89, 42, 42,  8,  8,  8,  8,  8,  8, 21, 12,  34},
-#endif
 };
 
 static const unsigned int y_mode_cts  [VP9_YMODES] = {
-#if CONFIG_SB8X8
   /* DC V   H  D45 135 117 153 D27 D63 TM i4X4 */
   98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 70
-#else
-  /* DC V   H  D45 135 117 153 D27 D63 TM i8x8 i4X4 */
-  98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 16, 70
-#endif
 };
 
 static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
@@ -61,19 +44,9 @@ static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
   { 150, 15, 10, 10, 10, 10, 10, 75, 10,  6}, /* D27 */
   { 150, 15, 10, 10, 10, 10, 10, 10, 75,  6}, /* D63 */
   { 160, 30, 30, 10, 10, 10, 10, 10, 10, 16}, /* TM */
-#if !CONFIG_SB8X8
-  { 132, 46, 40, 10, 10, 10, 10, 10, 10, 18}, /* i8x8 - never used */
-#endif
   { 150, 35, 41, 10, 10, 10, 10, 10, 10, 10}, /* i4X4 */
 };
 
-#if !CONFIG_SB8X8
-static const unsigned int i8x8_mode_cts  [VP9_I8X8_MODES] = {
-  /* DC V  H D45 135 117 153 D27 D63  TM */
-  73, 49, 61, 30, 30, 30, 30, 30, 30, 13
-};
-#endif
-
 static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
   // DC   V   H  D45 135 117 153 D27 D63 TM
   { 160, 24, 24, 20, 20, 20, 20, 20, 20,  8}, /* DC */
@@ -86,9 +59,6 @@ static const unsigned int kf_uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
   { 102, 33, 20, 20, 20, 20, 20, 64, 20, 14}, /* D27 */
   { 102, 33, 20, 20, 20, 20, 20, 20, 64, 14}, /* D63 */
   { 132, 36, 30, 20, 20, 20, 20, 20, 20, 18}, /* TM */
-#if !CONFIG_SB8X8
-  { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* i8x8 - never used */
-#endif
   { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* I4X4 */
 };
 
@@ -146,45 +116,13 @@ const vp9_prob vp9_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP9_SUBMVREFS - 1] = {
   { 208, 1, 1  }
 };
 
-#if !CONFIG_SB8X8
-vp9_mbsplit vp9_mbsplits [VP9_NUMMBSPLITS] = {
-  {
-    0,  0,  0,  0,
-    0,  0,  0,  0,
-    1,  1,  1,  1,
-    1,  1,  1,  1,
-  }, {
-    0,  0,  1,  1,
-    0,  0,  1,  1,
-    0,  0,  1,  1,
-    0,  0,  1,  1,
-  }, {
-    0,  0,  1,  1,
-    0,  0,  1,  1,
-    2,  2,  3,  3,
-    2,  2,  3,  3,
-  }, {
-    0,  1,  2,  3,
-    4,  5,  6,  7,
-    8,  9,  10, 11,
-    12, 13, 14, 15,
-  },
-};
-
-const int vp9_mbsplit_count [VP9_NUMMBSPLITS] = { 2, 2, 4, 16};
-
-const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150};
-#endif
-
 const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS]
                                   [PARTITION_TYPES - 1] = {
-#if CONFIG_SB8X8
   // FIXME(jingning,rbultje) put real probabilities here
   {202, 162, 107},
   {16,  2,   169},
   {3,   246,  19},
   {104, 90,  134},
-#endif
   {202, 162, 107},
   {16,  2,   169},
   {3,   246,  19},
@@ -260,12 +198,7 @@ const vp9_tree_index vp9_ymode_tree[VP9_YMODES * 2 - 2] = {
   -D27_PRED, -D63_PRED,
   16, 18,
   -V_PRED, -H_PRED,
-#if CONFIG_SB8X8
   -TM_PRED, -I4X4_PRED
-#else
-  -TM_PRED, 20,
-  -I4X4_PRED, -I8X8_PRED
-#endif
 };
 
 const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = {
@@ -278,28 +211,9 @@ const vp9_tree_index vp9_kf_ymode_tree[VP9_YMODES * 2 - 2] = {
   -D27_PRED, -D63_PRED,
   16, 18,
   -V_PRED, -H_PRED,
-#if CONFIG_SB8X8
   -TM_PRED, -I4X4_PRED
-#else
-  -TM_PRED, 20,
-  -I4X4_PRED, -I8X8_PRED
-#endif
 };
 
-#if !CONFIG_SB8X8
-const vp9_tree_index vp9_i8x8_mode_tree[VP9_I8X8_MODES * 2 - 2] = {
-  2, 14,
-  -DC_PRED, 4,
-  6, 8,
-  -D45_PRED, -D135_PRED,
-  10, 12,
-  -D117_PRED, -D153_PRED,
-  -D27_PRED, -D63_PRED,
-  -V_PRED, 16,
-  -H_PRED, -TM_PRED
-};
-#endif
-
 const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = {
   2, 14,
   -DC_PRED, 4,
@@ -312,14 +226,6 @@ const vp9_tree_index vp9_uv_mode_tree[VP9_UV_MODES * 2 - 2] = {
   -H_PRED, -TM_PRED
 };
 
-#if !CONFIG_SB8X8
-const vp9_tree_index vp9_mbsplit_tree[6] = {
-  -PARTITIONING_4X4,   2,
-  -PARTITIONING_8X8,   4,
-  -PARTITIONING_16X8, -PARTITIONING_8X16,
-};
-#endif
-
 const vp9_tree_index vp9_mv_ref_tree[8] = {
   -ZEROMV, 2,
   -NEARESTMV, 4,
@@ -352,10 +258,6 @@ struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
 struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
 struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES];
 struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES];
-#if !CONFIG_SB8X8
-struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
-struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
-#endif
 
 struct vp9_token vp9_mv_ref_encoding_array[VP9_MVREFS];
 struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_MVREFS];
@@ -386,16 +288,8 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) {
                                      bct, uv_mode_cts[i], 0);
   }
 
-#if !CONFIG_SB8X8
-  vp9_tree_probs_from_distribution(vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob,
-                                   bct, i8x8_mode_cts, 0);
-#endif
-
   vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2,
              sizeof(vp9_sub_mv_ref_prob2));
-#if !CONFIG_SB8X8
-  vpx_memcpy(x->fc.mbsplit_prob, vp9_mbsplit_probs, sizeof(vp9_mbsplit_probs));
-#endif
   vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob,
              sizeof(vp9_switchable_interp_prob));
 
@@ -499,10 +393,6 @@ void vp9_entropy_mode_init() {
   vp9_tokens_from_tree(vp9_sb_ymode_encodings, vp9_sb_ymode_tree);
   vp9_tokens_from_tree(vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree);
   vp9_tokens_from_tree(vp9_uv_mode_encodings,  vp9_uv_mode_tree);
-#if !CONFIG_SB8X8
-  vp9_tokens_from_tree(vp9_i8x8_mode_encodings,  vp9_i8x8_mode_tree);
-  vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree);
-#endif
   vp9_tokens_from_tree(vp9_switchable_interp_encodings,
                        vp9_switchable_interp_tree);
   vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree);
@@ -681,11 +571,6 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
   update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_tree,
                     fc->bmode_counts, fc->pre_bmode_prob,
                     fc->bmode_prob, 0);
-#if !CONFIG_SB8X8
-  update_mode_probs(VP9_I8X8_MODES,
-                    vp9_i8x8_mode_tree, fc->i8x8_mode_counts,
-                    fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob, 0);
-#endif
 
   for (i = 0; i < SUBMVREF_COUNT; ++i)
     update_mode_probs(VP9_SUBMVREFS,
@@ -693,11 +578,6 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
                       fc->pre_sub_mv_ref_prob[i], fc->sub_mv_ref_prob[i],
                       LEFT4X4);
 
-#if !CONFIG_SB8X8
-  update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_tree,
-                    fc->mbsplit_counts, fc->pre_mbsplit_prob,
-                    fc->mbsplit_prob, 0);
-#endif
 #if CONFIG_COMP_INTERINTRA_PRED
   if (cm->use_interintra) {
     int factor, interintra_prob, count;
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 24f988f2522dcb8c52932cc26bf10e4799f70adc..f49bb3b42a85259dd846b661425b89f0ec072dbd 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -15,9 +15,6 @@
 #include "vp9/common/vp9_treecoder.h"
 
 #define SUBMVREF_COUNT 5
-#if !CONFIG_SB8X8
-#define VP9_NUMMBSPLITS 4
-#endif
 
 #if CONFIG_COMP_INTERINTRA_PRED
 #define VP9_DEF_INTERINTRA_PROB 248
@@ -26,16 +23,6 @@
 #define SEPARATE_INTERINTRA_UV  0
 #endif
 
-#if !CONFIG_SB8X8
-typedef const int vp9_mbsplit[16];
-
-extern vp9_mbsplit vp9_mbsplits[VP9_NUMMBSPLITS];
-
-extern const int vp9_mbsplit_count[VP9_NUMMBSPLITS];    /* # of subsets */
-
-extern const vp9_prob vp9_mbsplit_probs[VP9_NUMMBSPLITS - 1];
-#endif
-
 extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
 
 extern const vp9_prob vp9_sub_mv_ref_prob2[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
@@ -52,10 +39,6 @@ extern const vp9_tree_index  vp9_kf_ymode_tree[];
 extern const vp9_tree_index  vp9_uv_mode_tree[];
 #define vp9_sb_ymode_tree vp9_uv_mode_tree
 #define vp9_sb_kf_ymode_tree vp9_uv_mode_tree
-#if !CONFIG_SB8X8
-extern const vp9_tree_index  vp9_i8x8_mode_tree[];
-extern const vp9_tree_index  vp9_mbsplit_tree[];
-#endif
 extern const vp9_tree_index  vp9_mv_ref_tree[];
 extern const vp9_tree_index  vp9_sb_mv_ref_tree[];
 extern const vp9_tree_index  vp9_sub_mv_ref_tree[];
@@ -67,10 +50,6 @@ extern struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
 extern struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
 extern struct vp9_token vp9_kf_ymode_encodings[VP9_YMODES];
 extern struct vp9_token vp9_uv_mode_encodings[VP9_UV_MODES];
-#if !CONFIG_SB8X8
-extern struct vp9_token vp9_i8x8_mode_encodings[VP9_I8X8_MODES];
-extern struct vp9_token vp9_mbsplit_encodings[VP9_NUMMBSPLITS];
-#endif
 
 /* Inter mode values do not start at zero */
 
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 3f00ba496dc0a8d2ab2a91e2e94993037221f4fc..51454c14ac91b739fa6ac903812eef276867b238 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -13,22 +13,16 @@
 
 #include "./vpx_config.h"
 
-#if CONFIG_SB8X8
 #define LOG2_MI_SIZE 3
-#else
-#define LOG2_MI_SIZE 4
-#endif
 
 #define MI_SIZE (1 << LOG2_MI_SIZE)
 #define MI_UV_SIZE (1 << (LOG2_MI_SIZE - 1))
 
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
-#if CONFIG_SB8X8
   BLOCK_SIZE_SB8X8,
   BLOCK_SIZE_SB8X16,
   BLOCK_SIZE_SB16X8,
-#endif
   BLOCK_SIZE_MB16X16,
   BLOCK_SIZE_SB16X32,
   BLOCK_SIZE_SB32X16,
@@ -47,6 +41,6 @@ typedef enum PARTITION_TYPE {
 } PARTITION_TYPE;
 
 #define PARTITION_PLOFFSET   4  // number of probability models per block size
-#define NUM_PARTITION_CONTEXTS ((2 + CONFIG_SB8X8) * PARTITION_PLOFFSET)
+#define NUM_PARTITION_CONTEXTS (3 * PARTITION_PLOFFSET)
 
 #endif  // VP9_COMMON_VP9_ENUMS_H_
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index df1ab73e87f14a0f5adcc02b3178d5c4fbd827de..0a1c4133b54a9f7005c953603097cc2ccd37ca8e 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -74,13 +74,9 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
                            vp9_prob p[VP9_MVREFS - 1],
                            const int context);
 
-#if !CONFIG_SB8X8
-extern const uint8_t vp9_mbsplit_offset[4][16];
-#endif
-
 static int left_block_mv(const MACROBLOCKD *xd,
                          const MODE_INFO *cur_mb, int b) {
-  if (!(b & (3 >> CONFIG_SB8X8))) {
+  if (!(b & 1)) {
     if (!xd->left_available)
       return 0;
 
@@ -90,7 +86,7 @@ static int left_block_mv(const MACROBLOCKD *xd,
     if (cur_mb->mbmi.mode != SPLITMV)
       return cur_mb->mbmi.mv[0].as_int;
 
-    b += 4 >> CONFIG_SB8X8;
+    b += 2;
   }
 
   return (cur_mb->bmi + b - 1)->as_mv[0].as_int;
@@ -98,7 +94,7 @@ static int left_block_mv(const MACROBLOCKD *xd,
 
 static int left_block_second_mv(const MACROBLOCKD *xd,
                                 const MODE_INFO *cur_mb, int b) {
-  if (!(b & (3 >> CONFIG_SB8X8))) {
+  if (!(b & 1)) {
     if (!xd->left_available)
       return 0;
 
@@ -108,7 +104,7 @@ static int left_block_second_mv(const MACROBLOCKD *xd,
     if (cur_mb->mbmi.mode != SPLITMV)
       return cur_mb->mbmi.second_ref_frame > 0 ?
           cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
-    b += 4 >> CONFIG_SB8X8;
+    b += 2;
   }
 
   return cur_mb->mbmi.second_ref_frame > 0 ?
@@ -117,85 +113,69 @@ static int left_block_second_mv(const MACROBLOCKD *xd,
 }
 
 static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
-  if (!(b >> (2 >> CONFIG_SB8X8))) {
+  if (!(b >> 1)) {
     /* On top edge, get from MB above us */
     cur_mb -= mi_stride;
 
     if (cur_mb->mbmi.mode != SPLITMV)
       return cur_mb->mbmi.mv[0].as_int;
-    b += 16 >> (2 * CONFIG_SB8X8);
+    b += 4;
   }
 
-  return (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[0].as_int;
+  return (cur_mb->bmi + b - 2)->as_mv[0].as_int;
 }
 
 static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
-  if (!(b >> (2 >> CONFIG_SB8X8))) {
+  if (!(b >> 1)) {
     /* On top edge, get from MB above us */
     cur_mb -= mi_stride;
 
     if (cur_mb->mbmi.mode != SPLITMV)
       return cur_mb->mbmi.second_ref_frame > 0 ?
           cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
-    b += 16 >> (2 * CONFIG_SB8X8);
+    b += 4;
   }
 
   return cur_mb->mbmi.second_ref_frame > 0 ?
-      (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[1].as_int :
-      (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mv[0].as_int;
+      (cur_mb->bmi + b - 2)->as_mv[1].as_int :
+      (cur_mb->bmi + b - 2)->as_mv[0].as_int;
 }
 
 static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
-#if CONFIG_SB8X8
   // FIXME(rbultje, jingning): temporary hack because jenkins doesn't
   // understand this condition. This will go away soon.
   if (b == 0 || b == 2) {
-#else
-  if (!(b & (3 >> CONFIG_SB8X8))) {
-#endif
     /* On L edge, get from MB to left of us */
     --cur_mb;
 
     if (cur_mb->mbmi.mode <= TM_PRED) {
       return pred_mode_conv(cur_mb->mbmi.mode);
-#if !CONFIG_SB8X8
-    } else if (cur_mb->mbmi.mode == I8X8_PRED) {
-      return pred_mode_conv(
-          (MB_PREDICTION_MODE)(cur_mb->bmi + 3 + b)->as_mode.first);
-#endif  // !CONFIG_SB8X8
     } else if (cur_mb->mbmi.mode == I4X4_PRED) {
-      return ((cur_mb->bmi + (3 >> CONFIG_SB8X8) + b)->as_mode.first);
+      return ((cur_mb->bmi + 1 + b)->as_mode.first);
     } else {
       return B_DC_PRED;
     }
   }
-#if CONFIG_SB8X8
   assert(b == 1 || b == 3);
-#endif
   return (cur_mb->bmi + b - 1)->as_mode.first;
 }
 
 static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
                                           int b, int mi_stride) {
-  if (!(b >> (2 >> CONFIG_SB8X8))) {
+  if (!(b >> 1)) {
     /* On top edge, get from MB above us */
     cur_mb -= mi_stride;
 
     if (cur_mb->mbmi.mode <= TM_PRED) {
       return pred_mode_conv(cur_mb->mbmi.mode);
-#if !CONFIG_SB8X8
-    } else if (cur_mb->mbmi.mode == I8X8_PRED) {
-      return pred_mode_conv(
-          (MB_PREDICTION_MODE)(cur_mb->bmi + 12 + b)->as_mode.first);
-#endif
     } else if (cur_mb->mbmi.mode == I4X4_PRED) {
-      return ((cur_mb->bmi + (CONFIG_SB8X8 ? 2 : 12) + b)->as_mode.first);
+      return ((cur_mb->bmi + 2 + b)->as_mode.first);
     } else {
       return B_DC_PRED;
     }
   }
 
-  return (cur_mb->bmi + b - (4 >> CONFIG_SB8X8))->as_mode.first;
+  return (cur_mb->bmi + b - 2)->as_mode.first;
 }
 
 #endif  // VP9_COMMON_VP9_FINDNEARMV_H_
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index edb0c540b7effbb5766035c740b7d74abfb1a7f2..022abb8aad066d6089e00e4b98e4c4617d1af599 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -27,9 +27,6 @@ static void lf_init_lut(loop_filter_info_n *lfi) {
   lfi->mode_lf_lut[H_PRED] = 1;
   lfi->mode_lf_lut[TM_PRED] = 1;
   lfi->mode_lf_lut[I4X4_PRED]  = 0;
-#if !CONFIG_SB8X8
-  lfi->mode_lf_lut[I8X8_PRED] = 0;
-#endif
   lfi->mode_lf_lut[ZEROMV]  = 1;
   lfi->mode_lf_lut[NEARESTMV] = 2;
   lfi->mode_lf_lut[NEARMV] = 2;
@@ -169,12 +166,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm,
 static int mb_lf_skip(const MB_MODE_INFO *const mbmi) {
   const int skip_coef = mbmi->mb_skip_coeff;
   const int tx_size = mbmi->txfm_size;
-#if CONFIG_SB8X8
   return mbmi->sb_type >= BLOCK_SIZE_MB16X16 &&
-#else
-  const MB_PREDICTION_MODE mode = mbmi->mode;
-  return mode != I4X4_PRED && mode != I8X8_PRED && mode != SPLITMV &&
-#endif
          (tx_size >= TX_16X16 || skip_coef);
 }
 
@@ -227,11 +219,7 @@ static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi,
       if (!skip_lf) {
         if (tx_size >= TX_8X8) {
           if (tx_size == TX_8X8 &&
-#if CONFIG_SB8X8
               (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
-#else
-              (mode == I8X8_PRED || mode == SPLITMV)
-#endif
               )
             vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr,
                                   y_stride, uv_stride, &lfi);
@@ -257,12 +245,7 @@ static void lpf_mb(VP9_COMMON *cm, const MODE_INFO *mi,
       if (!skip_lf) {
         if (tx_size >= TX_8X8) {
           if (tx_size == TX_8X8 &&
-#if CONFIG_SB8X8
-              (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
-#else
-              (mode == I8X8_PRED || mode == SPLITMV)
-#endif
-              )
+              (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16))
             vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr,
                                   y_stride, uv_stride, &lfi);
           else
@@ -322,7 +305,7 @@ static void lpf_sb32(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
       y_only? 0 : v_ptr,
       y_stride, uv_stride, dering);
   // process 2nd MB top-right
-  mi = mode_info_context + (1 << CONFIG_SB8X8);
+  mi = mode_info_context + 2;
   do_left_v = !(wbl >= 3 /* 32x16 or >=32x32 */ && (tx_size >= TX_32X32 ||
       sb_mb_lf_skip(mode_info_context, mi)));
   do_above_h = (mb_row > 0);
@@ -338,7 +321,7 @@ static void lpf_sb32(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
       y_stride, uv_stride, dering);
 
   // process 3rd MB bottom-left
-  mi = mode_info_context + (mis << CONFIG_SB8X8);
+  mi = mode_info_context + (mis << 1);
   do_left_v = (mb_col > 0);
   do_above_h = !(hbl >= 3 /* 16x32 or >=32x32 */ && (tx_size >= TX_32X32 ||
       sb_mb_lf_skip(mode_info_context, mi)));
@@ -354,15 +337,15 @@ static void lpf_sb32(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
       y_stride, uv_stride, dering);
 
   // process 4th MB bottom right
-  mi = mode_info_context + ((mis + 1) << CONFIG_SB8X8);
+  mi = mode_info_context + ((mis + 1) << 1);
   do_left_v = !(wbl >= 3 /* 32x16 or >=32x32 */ && (tx_size >= TX_32X32 ||
-      sb_mb_lf_skip(mi - (1 << CONFIG_SB8X8), mi)));
+      sb_mb_lf_skip(mi - 2, mi)));
   do_above_h = !(hbl >= 3 /* 16x32 or >=32x32 */ && (tx_size >= TX_32X32 ||
-      sb_mb_lf_skip(mode_info_context + (1 << CONFIG_SB8X8), mi)));
+      sb_mb_lf_skip(mode_info_context + 2, mi)));
   do_left_v_mbuv = (wbl >= 3 /* 32x16 or >=32x32 */ && (tx_size >= TX_16X16 ||
-      sb_mb_lf_skip(mi - (1 << CONFIG_SB8X8), mi)));
+      sb_mb_lf_skip(mi - 2, mi)));
   do_above_h_mbuv = !(hbl >= 3 /* 16x32 or >=32x32 */ && (tx_size >= TX_16X16 ||
-      sb_mb_lf_skip(mode_info_context + (1 << CONFIG_SB8X8), mi)));
+      sb_mb_lf_skip(mode_info_context + 2, mi)));
   lpf_mb(cm, mi, do_left_v, do_above_h,
       do_left_v_mbuv, do_above_h_mbuv,
       y_ptr + 16 * y_stride + 16,
@@ -379,17 +362,16 @@ static void lpf_sb64(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
   lpf_sb32(cm, mode_info_context, mb_row, mb_col,
       y_ptr, u_ptr, v_ptr,
       y_stride, uv_stride, y_only, dering);
-  lpf_sb32(cm, mode_info_context + (2 << CONFIG_SB8X8), mb_row, mb_col + 2,
+  lpf_sb32(cm, mode_info_context + 4, mb_row, mb_col + 2,
       y_ptr + 32, u_ptr + 16, v_ptr + 16,
       y_stride, uv_stride, y_only, dering);
-  lpf_sb32(cm, mode_info_context + cm->mode_info_stride * (2 << CONFIG_SB8X8),
+  lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 4,
       mb_row + 2, mb_col,
       y_ptr + 32 * y_stride,
       u_ptr + 16 * uv_stride,
       v_ptr + 16 * uv_stride,
       y_stride, uv_stride, y_only, dering);
-  lpf_sb32(cm, mode_info_context + cm->mode_info_stride *
-      (2 << CONFIG_SB8X8) + (2 << CONFIG_SB8X8),
+  lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 4 + 4,
       mb_row + 2, mb_col + 2,
       y_ptr + 32 * y_stride + 32,
       u_ptr + 16 * uv_stride + 16,
@@ -459,14 +441,14 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 64;
       u_ptr = y_only? 0 : u_ptr + 32;
       v_ptr = y_only? 0 : v_ptr + 32;
-      mode_info_context += 4 << CONFIG_SB8X8;       // step to next SB64
+      mode_info_context += 8;       // step to next SB64
     }
     if (extra_sb32_col) {
       // process 2 SB32s in the extra SB32 col
       lpf_sb32(cm, mode_info_context, mb_row, mb_col,
                y_ptr, u_ptr, v_ptr,
                y_stride, uv_stride, y_only, dering);
-      lpf_sb32(cm, mode_info_context + mis * (2 << CONFIG_SB8X8),
+      lpf_sb32(cm, mode_info_context + mis * 4,
                mb_row + 2, mb_col,
                y_ptr + 32 * y_stride,
                u_ptr + 16 * uv_stride,
@@ -475,7 +457,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 32;
       u_ptr = y_only? 0 : u_ptr + 16;
       v_ptr = y_only? 0 : v_ptr + 16;
-      mode_info_context += 2 << CONFIG_SB8X8;       // step to next SB32
+      mode_info_context += 4;       // step to next SB32
       mb_col += 2;
     }
     if (extra_mb_col) {
@@ -493,7 +475,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
              y_only? 0 : v_ptr,
              y_stride, uv_stride, dering);
       // process 2nd MB
-      mi = mode_info_context + (mis << CONFIG_SB8X8);
+      mi = mode_info_context + (mis << 1);
       do_left_v = (mb_col > 0);
       do_above_h = 1;
       do_left_v_mbuv =  1;
@@ -505,7 +487,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
              y_only ? 0 : (v_ptr + 8 * uv_stride),
              y_stride, uv_stride, dering);
       // process 3nd MB
-      mi = mode_info_context + (mis << CONFIG_SB8X8) * 2;
+      mi = mode_info_context + (mis << 1) * 2;
       do_left_v = (mb_col > 0);
       do_above_h = 1;
       do_left_v_mbuv =  1;
@@ -517,7 +499,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
              y_only ? 0 : (v_ptr + 16 * uv_stride),
              y_stride, uv_stride, dering);
       // process 4th MB
-      mi = mode_info_context + (mis << CONFIG_SB8X8) * 3;
+      mi = mode_info_context + (mis << 1) * 3;
       do_left_v = (mb_col > 0);
       do_above_h = 1;
       do_left_v_mbuv =  1;
@@ -531,7 +513,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
-      mode_info_context += 1 << CONFIG_SB8X8;       // step to next MB
+      mode_info_context += 2;       // step to next MB
     }
     // move pointers to the begining of next sb64 row
     y_ptr += y_stride  * 64 - post->y_width;
@@ -540,7 +522,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       v_ptr += uv_stride *  32 - post->uv_width;
     }
     /* skip to next SB64 row */
-    mode_info_context += mis * (4 << CONFIG_SB8X8) - cm->mi_cols;
+    mode_info_context += mis * 8 - cm->mi_cols;
   }
   if (extra_sb32_row) {
     const int sb32_cols = sb64_cols * 2 + extra_sb32_col;
@@ -551,7 +533,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 32;
       u_ptr = y_only? 0 : u_ptr + 16;
       v_ptr = y_only? 0 : v_ptr + 16;
-      mode_info_context += 2 << CONFIG_SB8X8;       // step to next SB32
+      mode_info_context += 4;       // step to next SB32
     }
     if (extra_mb_col) {
       // process 1st MB
@@ -567,7 +549,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
              y_only? NULL : v_ptr,
              y_stride, uv_stride, dering);
       // process 2nd MB
-      mi = mode_info_context + (mis << CONFIG_SB8X8);
+      mi = mode_info_context + (mis << 1);
       do_left_v = (mb_col > 0);
       do_above_h = 1;
       do_left_v_mbuv =  1;
@@ -581,14 +563,14 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
-      mode_info_context += 1 << CONFIG_SB8X8;       /* step to next MB */
+      mode_info_context += 2;       /* step to next MB */
     }
     // move pointers to the beginning of next sb64 row
     y_ptr += y_stride * 32 - post->y_width;
     u_ptr += y_only? 0 : uv_stride *  16 - post->uv_width;
     v_ptr += y_only? 0 : uv_stride *  16 - post->uv_width;
     // skip to next MB row if exist
-    mode_info_context += mis * (2 << CONFIG_SB8X8) - cm->mi_cols;
+    mode_info_context += mis * 4 - cm->mi_cols;
     mb_row += 2;
   }
   if (extra_mb_row) {
@@ -607,7 +589,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm,
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
-      mode_info_context += 1 << CONFIG_SB8X8;     // step to next MB
+      mode_info_context += 2;     // step to next MB
     }
   }
 }
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 7a7ebe64f9bbd992557cf94acd1640f2e6674ab6..3f18c6961fafda22e74f8b172be43bd979e57d7d 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -12,7 +12,6 @@
 
 #define MVREF_NEIGHBOURS 8
 
-#if CONFIG_SB8X8
 static int b_mv_ref_search[MVREF_NEIGHBOURS][2] = {
   {0, -1}, {-1, 0}, {-1, -1}, {0, -2},
   {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}
@@ -32,22 +31,6 @@ static int sb64_mv_ref_search[MVREF_NEIGHBOURS][2] = {
     {0, -1}, {-1, 0}, {2, -1}, {-1,  2},
     {4, -1}, {-1, 4}, {6, -1}, {-1, -1}
 };
-#else
-static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
-  {0, -1}, {-1, 0}, {-1, -1}, {0, -2},
-  {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}
-};
-
-static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
-  {0, -1}, {-1, 0}, {1, -1}, {-1, 1},
-  {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}
-};
-
-static int sb64_mv_ref_search[MVREF_NEIGHBOURS][2] = {
-  {0, -1}, {-1, 0}, {1, -1}, {-1,  1},
-  {2, -1}, {-1, 2}, {3, -1}, {-1, -1}
-};
-#endif
 
 // clamp_mv_ref
 #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
@@ -190,15 +173,10 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
     mv_ref_search = sb64_mv_ref_search;
   } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) {
     mv_ref_search = sb_mv_ref_search;
-#if CONFIG_SB8X8
   } else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) {
     mv_ref_search = mb_mv_ref_search;
   } else {
     mv_ref_search = b_mv_ref_search;
-#else
-  } else {
-    mv_ref_search = mb_mv_ref_search;
-#endif
   }
 
   // We first scan for candidate vectors that match the current reference frame
@@ -208,7 +186,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
 
     if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
         (mi_search_col < cm->cur_tile_mi_col_end) &&
-        ((mv_ref_search[i][1] << (7 - CONFIG_SB8X8)) >= xd->mb_to_top_edge)) {
+        ((mv_ref_search[i][1] << 6) >= xd->mb_to_top_edge)) {
 
       candidate_mi = here + mv_ref_search[i][0] +
                      (mv_ref_search[i][1] * xd->mode_info_stride);
@@ -228,7 +206,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
 
     if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
         (mi_search_col < cm->cur_tile_mi_col_end) &&
-        ((mv_ref_search[i][1] << (7 - CONFIG_SB8X8)) >= xd->mb_to_top_edge)) {
+        ((mv_ref_search[i][1] << 6) >= xd->mb_to_top_edge)) {
       candidate_mi = here + mv_ref_search[i][0] +
                      (mv_ref_search[i][1] * xd->mode_info_stride);
 
@@ -258,7 +236,7 @@ void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
 
     if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
         (mi_search_col < cm->cur_tile_mi_col_end) &&
-        ((mv_ref_search[i][1] << (7 - CONFIG_SB8X8)) >= xd->mb_to_top_edge)) {
+        ((mv_ref_search[i][1] << 6) >= xd->mb_to_top_edge)) {
       candidate_mi = here + mv_ref_search[i][0] +
                      (mv_ref_search[i][1] * xd->mode_info_stride);
 
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 1538a009b637d9cb31c8a1b7834cac750e3396bc..fe6e14ab545f998430a8e6428411605314f4bac2 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -55,13 +55,7 @@ typedef struct frame_contexts {
   vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
   vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
-#if !CONFIG_SB8X8
-  vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
-#endif
   vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
-#if !CONFIG_SB8X8
-  vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
-#endif
   vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
   vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
@@ -81,25 +75,13 @@ typedef struct frame_contexts {
   vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1];
   vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
-#if !CONFIG_SB8X8
-  vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1];
-#endif
   vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
-#if !CONFIG_SB8X8
-  vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1];
-#endif
   vp9_prob pre_partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
   unsigned int bmode_counts[VP9_NKF_BINTRAMODES];
   unsigned int ymode_counts[VP9_YMODES];   /* interframe intra mode probs */
   unsigned int sb_ymode_counts[VP9_I32X32_MODES];
   unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES];
-#if !CONFIG_SB8X8
-  unsigned int i8x8_mode_counts[VP9_I8X8_MODES];   /* interframe intra probs */
-#endif
   unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS];
-#if !CONFIG_SB8X8
-  unsigned int mbsplit_counts[VP9_NUMMBSPLITS];
-#endif
   unsigned int partition_counts[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
   vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
@@ -204,8 +186,7 @@ typedef struct VP9Common {
 
   int frame_flags;
   // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
-  // MODE_INFO units (depending on CONFIG_SB8X8, that is either
-  // 16-pixel or 8-pixel)
+  // MODE_INFO (8-pixel) units.
   int MBs;
   int mb_rows, mi_rows;
   int mb_cols, mi_cols;
diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c
index 6b102d1d384c1b96e0f2f909542548c47960138a..69a47201b4ac77f8d587c4bfbd48fee1cb9c0c64 100644
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -34,26 +34,6 @@ void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, int diff_stride,
   recon(4, 4, diff_ptr, diff_stride, dst_ptr, stride);
 }
 
-#if !CONFIG_SB8X8
-void vp9_recon_uv_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
-                      int stride) {
-  assert(pred_ptr == dst_ptr);
-  recon(4, 4, diff_ptr, 8, dst_ptr, stride);
-}
-
-void vp9_recon4b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
-                   int stride) {
-  assert(pred_ptr == dst_ptr);
-  recon(4, 16, diff_ptr, 16, dst_ptr, stride);
-}
-
-void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
-                   int stride) {
-  assert(pred_ptr == dst_ptr);
-  recon(4, 8, diff_ptr, 8, dst_ptr, stride);
-}
-#endif
-
 static void recon_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, int plane) {
   const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
   const int bh = 4 << (b_height_log2(bsize) - xd->plane[plane].subsampling_y);
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 042006354d48a91a6521bf4d67bbd8116b431732..e39e050c2ab248848897191d7c5eb104dbe3e1b5 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -265,13 +265,8 @@ static INLINE int round_mv_comp_q4(int value) {
   return (value < 0 ? value - 2 : value + 2) / 4;
 }
 
-#if CONFIG_SB8X8
 #define IDX1 2
 #define IDX2 3
-#else
-#define IDX1 4
-#define IDX2 5
-#endif
 
 static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int off, int idx) {
   const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row +
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index aef34c9b1cfcfda396fae71e5d0449b14fd8889a..d01cfa4e6c78ad92df560e029fde64118cb0fd07 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -573,22 +573,6 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd,
                              xd->left_available, 0 /*xd->right_available*/);
 }
 
-#if !CONFIG_SB8X8
-void vp9_intra8x8_predict(MACROBLOCKD *xd,
-                          int block4x4_idx,
-                          int mode,
-                          uint8_t *predictor, int pre_stride) {
-  const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2);
-  const int have_top = (block_idx >> 1) || xd->up_available;
-  const int have_left = (block_idx & 1) || xd->left_available;
-  const int have_right = !(block_idx & 1) || xd->right_available;
-
-  vp9_build_intra_predictors(predictor, pre_stride,
-                             predictor, pre_stride,
-                             mode, 8, 8, have_top, have_left,
-                             have_right);
-}
-#endif
 #if !CONFIG_NEWBINTRAMODES
 void vp9_intra4x4_predict(MACROBLOCKD *xd,
                           int block_idx,
@@ -609,19 +593,3 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd,
                              have_right);
 }
 #endif
-#if !CONFIG_SB8X8
-void vp9_intra_uv4x4_predict(MACROBLOCKD *xd,
-                             int block4x4_idx,
-                             int mode,
-                             uint8_t *predictor, int pre_stride) {
-  const int block_idx = block4x4_idx & 3;
-  const int have_top = (block_idx >> 1) || xd->up_available;
-  const int have_left = (block_idx & 1) || xd->left_available;
-  const int have_right = !(block_idx & 1);
-
-  vp9_build_intra_predictors(predictor, pre_stride,
-                             predictor, pre_stride,
-                             mode, 4, 4, have_top, have_left,
-                             have_right);
-}
-#endif
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index e473d81dabbd3601ecdea894a9737509430086e2..75e36040c9a38881ab514652fa76a3a4c5702c05 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -63,23 +63,6 @@ specialize vp9_copy_mem8x4 mmx
 prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, int diff_stride, uint8_t *dst_ptr, int stride"
 specialize vp9_recon_b
 
-if [ "$CONFIG_SB8X8" != "yes" ]; then
-
-prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
-specialize vp9_recon_uv_b
-
-# TODO(jingning): The prototype functions in c are modified to enable block-size configurable
-# operations. Need to change the sse2 accrodingly.
-prototype void vp9_recon2b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
-specialize vp9_recon2b
-# specialize vp9_recon2b sse2
-
-prototype void vp9_recon4b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
-specialize vp9_recon4b
-# specialize vp9_recon4b sse2
-
-fi
-
 prototype void vp9_recon_sb "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize"
 specialize vp9_recon_sb
 
@@ -101,16 +84,6 @@ specialize vp9_build_intra_predictors_sbuv_s
 prototype void vp9_intra4x4_predict "struct macroblockd *xd, int block, enum BLOCK_SIZE_TYPE bsize, int b_mode, uint8_t *predictor, int pre_stride"
 specialize vp9_intra4x4_predict;
 
-if [ "$CONFIG_SB8X8" != "yes" ]; then
-
-prototype void vp9_intra8x8_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride"
-specialize vp9_intra8x8_predict;
-
-prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride"
-specialize vp9_intra_uv4x4_predict;
-
-fi
-
 if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
 prototype void vp9_add_residual_4x4 "const int16_t *diff, uint8_t *dest, int stride"
 specialize vp9_add_residual_4x4 sse2
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index a9d8cf0eac79d0747c7b23bb4251a075eb3ea670..ea26289b76ec1dff6589244c683fcc1cc575c210 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -18,16 +18,12 @@
 static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
                                  int *max_tile_off, int tile_idx,
                                  int log2_n_tiles, int n_mis) {
-#if CONFIG_SB8X8
   const int n_sbs = (n_mis + 7) >> 3;
-#else
-  const int n_sbs = (n_mis + 3) >> 2;
-#endif
   const int sb_off1 =  (tile_idx      * n_sbs) >> log2_n_tiles;
   const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
 
-  *min_tile_off = MIN(sb_off1 << (2 + CONFIG_SB8X8), n_mis);
-  *max_tile_off = MIN(sb_off2 << (2 + CONFIG_SB8X8), n_mis);
+  *min_tile_off = MIN(sb_off1 << 3, n_mis);
+  *max_tile_off = MIN(sb_off2 << 3, n_mis);
 }
 
 void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) {
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index c6632443277f82eb6854b0b9c0a6db3694afab10..6eaa4d5425fc76d6e9ac8d1b888b9e6933859a94 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -65,12 +65,6 @@ static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *r, const vp9_prob *p) {
   return (MB_PREDICTION_MODE)treed_read(r, vp9_kf_ymode_tree, p);
 }
 
-#if !CONFIG_SB8X8
-static int read_i8x8_mode(vp9_reader *r, const vp9_prob *p) {
-  return treed_read(r, vp9_i8x8_mode_tree, p);
-}
-#endif
-
 static MB_PREDICTION_MODE read_uv_mode(vp9_reader *r, const vp9_prob *p) {
   return (MB_PREDICTION_MODE)treed_read(r, vp9_uv_mode_tree, p);
 }
@@ -130,11 +124,7 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m,
     m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
 
   // luma mode
-#if CONFIG_SB8X8
   m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_SB8X8 ?
-#else
-  m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_MB16X16 ?
-#endif
       read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]):
       read_kf_mb_ymode(r, cm->kf_ymode_prob[cm->kf_ymode_probs_index]);
 
@@ -142,58 +132,27 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m,
 
   if (m->mbmi.mode == I4X4_PRED) {
     int i;
-    for (i = 0; i < (16 >> (2 * CONFIG_SB8X8)); ++i) {
+    for (i = 0; i < 4; ++i) {
       const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
       const B_PREDICTION_MODE l = xd->left_available ||
-                                  (i & (3 >> CONFIG_SB8X8)) ?
+                                  (i & 1) ?
                                   left_block_mode(m, i) : B_DC_PRED;
       m->bmi[i].as_mode.first = read_kf_bmode(r, cm->kf_bmode_prob[a][l]);
     }
   }
 
-#if !CONFIG_SB8X8
-  if (m->mbmi.mode == I8X8_PRED) {
-    int i;
-    for (i = 0; i < 4; ++i) {
-      const int ib = vp9_i8x8_block[i];
-      const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob);
-
-      m->bmi[ib + 0].as_mode.first = mode8x8;
-      m->bmi[ib + 1].as_mode.first = mode8x8;
-      m->bmi[ib + 4].as_mode.first = mode8x8;
-      m->bmi[ib + 5].as_mode.first = mode8x8;
-    }
-  }
-
-  // chroma mode
-  if (m->mbmi.mode != I8X8_PRED)
-#endif
-  {
-    m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]);
-  }
+  m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]);
 
   if (cm->txfm_mode == TX_MODE_SELECT &&
-      !m->mbmi.mb_skip_coeff &&
-#if CONFIG_SB8X8
-      m->mbmi.mode != I4X4_PRED
-#else
-      m->mbmi.mode <= I8X8_PRED
-#endif
-      ) {
-#if CONFIG_SB8X8
+      !m->mbmi.mb_skip_coeff && m->mbmi.mode != I4X4_PRED) {
     const int allow_16x16 = m->mbmi.sb_type >= BLOCK_SIZE_MB16X16;
-#else
-    const int allow_16x16 = m->mbmi.mode != I8X8_PRED;
-#endif
     const int allow_32x32 = m->mbmi.sb_type >= BLOCK_SIZE_SB32X32;
     m->mbmi.txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
   } else if (cm->txfm_mode >= ALLOW_32X32 &&
              m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
     m->mbmi.txfm_size = TX_32X32;
   } else if (cm->txfm_mode >= ALLOW_16X16 &&
-#if CONFIG_SB8X8
              m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 &&
-#endif
              m->mbmi.mode <= TM_PRED) {
     m->mbmi.txfm_size = TX_16X16;
   } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != I4X4_PRED) {
@@ -677,12 +636,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
       if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
         mbmi->mode = ZEROMV;
       } else {
-        mbmi->mode =
-#if CONFIG_SB8X8
-                     mbmi->sb_type > BLOCK_SIZE_SB8X8 ?
-#else
-                     mbmi->sb_type > BLOCK_SIZE_MB16X16 ?
-#endif
+        mbmi->mode = mbmi->sb_type > BLOCK_SIZE_SB8X8 ?
                                      read_sb_mv_ref(r, mv_ref_p)
                                    : read_mv_ref(r, mv_ref_p);
         vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]);
@@ -776,29 +730,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     mbmi->uv_mode = DC_PRED;
     switch (mbmi->mode) {
       case SPLITMV: {
-#if CONFIG_SB8X8
         const int num_p = 4;
-#else
-        const int s = treed_read(r, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
-        const int num_p = vp9_mbsplit_count[s];
-#endif
         int j = 0;
 
-#if !CONFIG_SB8X8
-        cm->fc.mbsplit_counts[s]++;
-        mbmi->partitioning = s;
-#endif
         mbmi->need_to_clamp_mvs = 0;
         do {  // for each subset j
           int_mv leftmv, abovemv, second_leftmv, second_abovemv;
           int_mv blockmv, secondmv;
           int mv_contz;
           int blockmode;
-#if CONFIG_SB8X8
           int k = j;
-#else
-          int k = vp9_mbsplit_offset[s][j];  // first block in subset j
-#endif
 
           leftmv.as_int = left_block_mv(xd, mi, k);
           abovemv.as_int = above_block_mv(mi, k, mis);
@@ -852,50 +793,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
             default:
               break;
           }
-
-          /*  Commenting this section out, not sure why this was needed, and
-           *  there are mismatches with this section in rare cases since it is
-           *  not done in the encoder at all.
-          mbmi->need_to_clamp_mvs |= check_mv_bounds(&blockmv,
-                                                     mb_to_left_edge,
-                                                     mb_to_right_edge,
-                                                     mb_to_top_edge,
-                                                     mb_to_bottom_edge);
-          if (mbmi->second_ref_frame > 0) {
-            mbmi->need_to_clamp_mvs |= check_mv_bounds(&secondmv,
-                                                       mb_to_left_edge,
-                                                       mb_to_right_edge,
-                                                       mb_to_top_edge,
-                                                       mb_to_bottom_edge);
-          }
-          */
-
-#if CONFIG_SB8X8
           mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
           if (mbmi->second_ref_frame > 0)
             mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
-#else
-          {
-            /* Fill (uniform) modes, mvs of jth subset.
-             Must do it here because ensuing subsets can
-             refer back to us via "left" or "above". */
-            unsigned int fill_count = mbsplit_fill_count[s];
-            const uint8_t *fill_offset =
-                &mbsplit_fill_offset[s][j * fill_count];
-
-            do {
-              mi->bmi[*fill_offset].as_mv[0].as_int = blockmv.as_int;
-              if (mbmi->second_ref_frame > 0)
-                mi->bmi[*fill_offset].as_mv[1].as_int = secondmv.as_int;
-              fill_offset++;
-            } while (--fill_count);
-          }
-#endif
         } while (++j < num_p);
       }
 
-      mv0->as_int = mi->bmi[15 >> (2 * CONFIG_SB8X8)].as_mv[0].as_int;
-      mv1->as_int = mi->bmi[15 >> (2 * CONFIG_SB8X8)].as_mv[1].as_int;
+      mv0->as_int = mi->bmi[3].as_mv[0].as_int;
+      mv1->as_int = mi->bmi[3].as_mv[1].as_int;
 
       break;  /* done with SPLITMV */
 
@@ -960,12 +865,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     // required for left and above block mv
     mv0->as_int = 0;
 
-#if CONFIG_SB8X8
-    if (mbmi->sb_type > BLOCK_SIZE_SB8X8)
-#else
-    if (mbmi->sb_type > BLOCK_SIZE_MB16X16)
-#endif
-    {
+    if (mbmi->sb_type > BLOCK_SIZE_SB8X8) {
       mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
       cm->fc.sb_ymode_counts[mbmi->mode]++;
     } else {
@@ -983,28 +883,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
         if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
 #endif
         cm->fc.bmode_counts[m]++;
-      } while (++j < (16 >> (2 * CONFIG_SB8X8)));
+      } while (++j < 4);
     }
 
-#if !CONFIG_SB8X8
-    if (mbmi->mode == I8X8_PRED) {
-      int i;
-      for (i = 0; i < 4; i++) {
-        const int ib = vp9_i8x8_block[i];
-        const int mode8x8 = read_i8x8_mode(r, cm->fc.i8x8_mode_prob);
-
-        mi->bmi[ib + 0].as_mode.first = mode8x8;
-        mi->bmi[ib + 1].as_mode.first = mode8x8;
-        mi->bmi[ib + 4].as_mode.first = mode8x8;
-        mi->bmi[ib + 5].as_mode.first = mode8x8;
-        cm->fc.i8x8_mode_counts[mode8x8]++;
-      }
-    } else
-#endif
-    {
-      mbmi->uv_mode = read_uv_mode(r, cm->fc.uv_mode_prob[mbmi->mode]);
-      cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
-    }
+    mbmi->uv_mode = read_uv_mode(r, cm->fc.uv_mode_prob[mbmi->mode]);
+    cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
   }
   /*
   if (cm->current_video_frame == 1)
@@ -1012,44 +895,22 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     */
 
   if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
-      ((mbmi->ref_frame == INTRA_FRAME &&
-#if CONFIG_SB8X8
-        mbmi->mode != I4X4_PRED
-#else
-        mbmi->mode <= I8X8_PRED
-#endif
-        ) ||
-       (mbmi->ref_frame != INTRA_FRAME &&
-#if CONFIG_SB8X8
-        mbmi->mode != SPLITMV
-#else
-        !(mbmi->mode == SPLITMV && mbmi->partitioning == PARTITIONING_4X4)
-#endif
-        ))) {
-#if CONFIG_SB8X8
+      ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != I4X4_PRED) ||
+       (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
     const int allow_16x16 = mbmi->sb_type >= BLOCK_SIZE_MB16X16;
-#else
-    const int allow_16x16 = mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV;
-#endif
     const int allow_32x32 = mbmi->sb_type >= BLOCK_SIZE_SB32X32;
     mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
   } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 &&
              cm->txfm_mode >= ALLOW_32X32) {
     mbmi->txfm_size = TX_32X32;
   } else if (cm->txfm_mode >= ALLOW_16X16 &&
-#if CONFIG_SB8X8
              mbmi->sb_type >= BLOCK_SIZE_MB16X16 &&
-#endif
       ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
        (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
     mbmi->txfm_size = TX_16X16;
   } else if (cm->txfm_mode >= ALLOW_8X8 &&
       (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) &&
-       !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV
-#if !CONFIG_SB8X8
-         && mbmi->partitioning == PARTITIONING_4X4
-#endif
-         ))) {
+       !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV))) {
     mbmi->txfm_size = TX_8X8;
   } else {
     mbmi->txfm_size = TX_4X4;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 2457f79e184ffba7b20ca4b0d0922b5b517d6f8f..ca994e3e8cc77b88baf43f7f8c3bfacc64c07741 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -187,58 +187,6 @@ static void mb_init_dequantizer(VP9_COMMON *pc, MACROBLOCKD *xd) {
     xd->plane[i].dequant = pc->uv_dequant[xd->q_index];
 }
 
-#if !CONFIG_SB8X8
-static void decode_8x8(MACROBLOCKD *xd) {
-  const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
-  // luma
-  // if the first one is DCT_DCT assume all the rest are as well
-  TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
-  int i;
-  assert(mode == I8X8_PRED);
-  for (i = 0; i < 4; i++) {
-    int ib = vp9_i8x8_block[i];
-    int idx = (ib & 0x02) ? (ib + 2) : ib;
-    int16_t *q  = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
-    uint8_t* const dst =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                    xd->plane[0].dst.buf,
-                                    xd->plane[0].dst.stride);
-    int stride = xd->plane[0].dst.stride;
-    if (mode == I8X8_PRED) {
-      int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first;
-      vp9_intra8x8_predict(xd, ib, i8x8mode, dst, stride);
-    }
-    tx_type = get_tx_type_8x8(xd, ib);
-    vp9_iht_add_8x8_c(tx_type, q, dst, stride, xd->plane[0].eobs[idx]);
-  }
-
-  // chroma
-  for (i = 0; i < 4; i++) {
-    int ib = vp9_i8x8_block[i];
-    int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first;
-    uint8_t* dst;
-
-    dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i,
-                                    xd->plane[1].dst.buf,
-                                    xd->plane[1].dst.stride);
-    vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode,
-                            dst, xd->plane[1].dst.stride);
-    xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
-                 dst, xd->plane[1].dst.stride,
-                 xd->plane[1].eobs[i]);
-
-    dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i,
-                                    xd->plane[2].dst.buf,
-                                    xd->plane[1].dst.stride);
-    vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode,
-                            dst, xd->plane[1].dst.stride);
-    xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
-                 dst, xd->plane[1].dst.stride,
-                 xd->plane[2].eobs[i]);
-  }
-}
-#endif
-
 static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx,
                                  BLOCK_SIZE_TYPE bsize) {
   struct macroblockd_plane *const y = &xd->plane[0];
@@ -254,47 +202,6 @@ static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx,
   }
 }
 
-#if !CONFIG_SB8X8
-static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) {
-  TX_TYPE tx_type;
-  int i = 0;
-  const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
-  assert(mode == I8X8_PRED);
-  for (i = 0; i < 4; i++) {
-    int ib = vp9_i8x8_block[i];
-    const int iblock[4] = {0, 1, 4, 5};
-    int j;
-    uint8_t* dst;
-    int i8x8mode = xd->mode_info_context->bmi[ib].as_mode.first;
-
-    dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                    xd->plane[0].dst.buf,
-                                    xd->plane[0].dst.stride);
-    vp9_intra8x8_predict(xd, ib, i8x8mode, dst, xd->plane[0].dst.stride);
-    for (j = 0; j < 4; j++) {
-      tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
-      dequant_add_y(xd, tx_type, ib + iblock[j], BLOCK_SIZE_MB16X16);
-    }
-    dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i,
-                                    xd->plane[1].dst.buf,
-                                    xd->plane[1].dst.stride);
-    vp9_intra_uv4x4_predict(xd, 16 + i, i8x8mode,
-                            dst, xd->plane[1].dst.stride);
-    xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
-                 dst, xd->plane[1].dst.stride,
-                 xd->plane[1].eobs[i]);
-    dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 2, i,
-                                    xd->plane[2].dst.buf,
-                                    xd->plane[2].dst.stride);
-    vp9_intra_uv4x4_predict(xd, 20 + i, i8x8mode,
-                            dst, xd->plane[1].dst.stride);
-    xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
-                 dst, xd->plane[1].dst.stride,
-                 xd->plane[2].eobs[i]);
-  }
-}
-#endif
-
 static void decode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                          int ss_txfrm_size, void *arg) {
   MACROBLOCKD* const xd = arg;
@@ -446,35 +353,6 @@ static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mi_row, int mi_col,
   }
 }
 
-#if !CONFIG_SB8X8
-// TODO(jingning): This only performs I8X8_PRED decoding process, which will be
-// automatically covered by decode_sb, when SB8X8 is on.
-static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
-                     int mi_row, int mi_col,
-                     vp9_reader *r) {
-  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
-  const int tx_size = mbmi->txfm_size;
-
-  assert(mbmi->sb_type == BLOCK_SIZE_MB16X16);
-
-  if (mbmi->mb_skip_coeff) {
-    vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
-  } else {
-    // re-initialize macroblock dequantizer before detokenization
-    if (xd->segmentation_enabled)
-      mb_init_dequantizer(&pbi->common, xd);
-
-    if (!vp9_reader_has_error(r))
-      vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16);
-  }
-
-  if (tx_size == TX_8X8)
-    decode_8x8(xd);
-  else
-    decode_4x4(pbi, xd, r);
-}
-#endif
-
 static int get_delta_q(vp9_reader *r, int *dq) {
   const int old_value = *dq;
 
@@ -508,12 +386,12 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
 
   for (i = 0; i < MAX_MB_PLANE; i++) {
     xd->plane[i].above_context = cm->above_context[i] +
-        (mi_col * 4 >> (xd->plane[i].subsampling_x + CONFIG_SB8X8));
+        (mi_col * 2 >> xd->plane[i].subsampling_x);
     xd->plane[i].left_context = cm->left_context[i] +
-        (((mi_row * 4 >> CONFIG_SB8X8) & 15) >> xd->plane[i].subsampling_y);
+        (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
   }
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
+  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -560,33 +438,12 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
   vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r);
   set_refs(pbi, mi_row, mi_col);
 
-#if CONFIG_SB8X8
   if (bsize == BLOCK_SIZE_SB8X8 &&
       (xd->mode_info_context->mbmi.mode == SPLITMV ||
        xd->mode_info_context->mbmi.mode == I4X4_PRED))
     decode_atom(pbi, xd, mi_row, mi_col, r, bsize);
   else
     decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
-#else
-  // TODO(jingning): merge decode_sb_ and decode_mb_
-  if (bsize > BLOCK_SIZE_MB16X16) {
-    decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
-  } else {
-    // TODO(jingning): In transition of separating functionalities of decode_mb
-    // into decode_sb and decode_atom. Will remove decode_mb and clean this up
-    // when SB8X8 is on.
-    if (xd->mode_info_context->mbmi.mode == I4X4_PRED ||
-        (xd->mode_info_context->mbmi.mode == SPLITMV &&
-         xd->mode_info_context->mbmi.partitioning == PARTITIONING_4X4))
-      decode_atom(pbi, xd, mi_row, mi_col, r, bsize);
-    else if (xd->mode_info_context->mbmi.mode != I8X8_PRED)
-      decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
-    else
-      // TODO(jingning): decode_mb still carries deocding process of I8X8_PRED.
-      // This will be covered by decode_sb when SB8X8 is on.
-      decode_mb(pbi, xd, mi_row, mi_col, r);
-  }
-#endif
 
   xd->corrupted |= vp9_reader_has_error(r);
 }
@@ -603,16 +460,12 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
   if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols)
     return;
 
-#if CONFIG_SB8X8
   if (bsize > BLOCK_SIZE_SB8X8) {
-#else
-  if (bsize > BLOCK_SIZE_MB16X16) {
-#endif
     int pl;
     // read the partition information
     xd->left_seg_context =
-        pc->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
-    xd->above_seg_context = pc->above_seg_context + (mi_col >> CONFIG_SB8X8);
+        pc->left_seg_context + ((mi_row >> 1) & 3);
+    xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
     pl = partition_plane_context(xd, bsize);
     partition = treed_read(r, vp9_partition_tree,
                            pc->fc.partition_prob[pl]);
@@ -639,15 +492,10 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
         int j = n >> 1, i = n & 0x01;
         if (subsize == BLOCK_SIZE_SB32X32)
           xd->sb_index = n;
-#if CONFIG_SB8X8
         else if (subsize == BLOCK_SIZE_MB16X16)
           xd->mb_index = n;
         else
           xd->b_index = n;
-#else
-        else
-          xd->mb_index = n;
-#endif
         decode_modes_sb(pbi, mi_row + j * bs, mi_col + i * bs, r, subsize);
       }
       break;
@@ -655,15 +503,11 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
       assert(0);
   }
   // update partition context
-#if CONFIG_SB8X8
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
-#else
-  if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
-#endif
     return;
 
-  xd->left_seg_context = pc->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
-  xd->above_seg_context = pc->above_seg_context + (mi_col >> CONFIG_SB8X8);
+  xd->left_seg_context = pc->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -1014,13 +858,7 @@ static void update_frame_context(FRAME_CONTEXT *fc) {
   vp9_copy(fc->pre_sb_ymode_prob, fc->sb_ymode_prob);
   vp9_copy(fc->pre_uv_mode_prob, fc->uv_mode_prob);
   vp9_copy(fc->pre_bmode_prob, fc->bmode_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob);
-#endif
   vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob);
-#endif
   vp9_copy(fc->pre_partition_prob, fc->partition_prob);
   fc->pre_nmvc = fc->nmvc;
 
@@ -1033,13 +871,7 @@ static void update_frame_context(FRAME_CONTEXT *fc) {
   vp9_zero(fc->sb_ymode_counts);
   vp9_zero(fc->uv_mode_counts);
   vp9_zero(fc->bmode_counts);
-#if !CONFIG_SB8X8
-  vp9_zero(fc->i8x8_mode_counts);
-#endif
   vp9_zero(fc->sub_mv_ref_counts);
-#if !CONFIG_SB8X8
-  vp9_zero(fc->mbsplit_counts);
-#endif
   vp9_zero(fc->NMVcount);
   vp9_zero(fc->mv_ref_ct);
   vp9_zero(fc->partition_counts);
@@ -1067,12 +899,12 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
   int mi_row, mi_col;
 
   for (mi_row = pc->cur_tile_mi_row_start;
-       mi_row < pc->cur_tile_mi_row_end; mi_row += (4 << CONFIG_SB8X8)) {
+       mi_row < pc->cur_tile_mi_row_end; mi_row += 8) {
     // For a SB there are 2 left contexts, each pertaining to a MB row within
     vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
     vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context));
     for (mi_col = pc->cur_tile_mi_col_start;
-         mi_col < pc->cur_tile_mi_col_end; mi_col += (4 << CONFIG_SB8X8)) {
+         mi_col < pc->cur_tile_mi_col_end; mi_col += 8) {
       decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64);
     }
   }
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index b6dd984a301a29a2476f2eaa640ff43c07e15567..acc3f78f2cbf6feac808286deaab54e60a09dbfa 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -281,12 +281,6 @@ static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
   write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m);
 }
 
-#if !CONFIG_SB8X8
-static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) {
-  write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m);
-}
-#endif
-
 static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) {
   write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
 }
@@ -304,12 +298,6 @@ static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
   write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
 }
 
-#if !CONFIG_SB8X8
-static void write_split(vp9_writer *bc, int x, const vp9_prob *p) {
-  write_token(bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x);
-}
-#endif
-
 static int prob_update_savings(const unsigned int *ct,
                                const vp9_prob oldp, const vp9_prob newp,
                                const vp9_prob upd) {
@@ -671,11 +659,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
     active_section = 6;
 #endif
 
-#if CONFIG_SB8X8
     if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
-#else
-    if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
-#endif
       write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
     else
       write_ymode(bc, mode, pc->fc.ymode_prob);
@@ -685,24 +669,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
       do {
         write_bmode(bc, m->bmi[j].as_mode.first,
                     pc->fc.bmode_prob);
-      } while (++j < (16 >> (CONFIG_SB8X8 * 2)));
-    }
-#if !CONFIG_SB8X8
-    if (mode == I8X8_PRED) {
-      write_i8x8_mode(bc, m->bmi[0].as_mode.first,
-                      pc->fc.i8x8_mode_prob);
-      write_i8x8_mode(bc, m->bmi[2].as_mode.first,
-                      pc->fc.i8x8_mode_prob);
-      write_i8x8_mode(bc, m->bmi[8].as_mode.first,
-                      pc->fc.i8x8_mode_prob);
-      write_i8x8_mode(bc, m->bmi[10].as_mode.first,
-                      pc->fc.i8x8_mode_prob);
-    } else
-#endif
-    {
-      write_uv_mode(bc, mi->uv_mode,
-                    pc->fc.uv_mode_prob[mode]);
+      } while (++j < 4);
     }
+    write_uv_mode(bc, mi->uv_mode,
+                  pc->fc.uv_mode_prob[mode]);
   } else {
     vp9_prob mv_ref_p[VP9_MVREFS - 1];
 
@@ -714,11 +684,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
 
     // If segment skip is not enabled code the mode.
     if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
-#if CONFIG_SB8X8
       if (mi->sb_type > BLOCK_SIZE_SB8X8) {
-#else
-      if (mi->sb_type > BLOCK_SIZE_MB16X16) {
-#endif
         write_sb_mv_ref(bc, mode, mv_ref_p);
       } else {
         write_mv_ref(bc, mode, mv_ref_p);
@@ -788,34 +754,16 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
         ++count_mb_seg[mi->partitioning];
 #endif
 
-#if !CONFIG_SB8X8
-        write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob);
-        cpi->mbsplit_count[mi->partitioning]++;
-#endif
-
         do {
           B_PREDICTION_MODE blockmode;
           int_mv blockmv;
-#if !CONFIG_SB8X8
-          const int *const  L = vp9_mbsplits[mi->partitioning];
-#endif
           int k = -1;  /* first block in subset j */
           int mv_contz;
           int_mv leftmv, abovemv;
 
           blockmode = cpi->mb.partition_info->bmi[j].mode;
           blockmv = cpi->mb.partition_info->bmi[j].mv;
-#if CONFIG_SB8X8
           k = j;
-#else
-#if CONFIG_DEBUG
-          while (j != L[++k])
-            if (k >= 16)
-              assert(0);
-#else
-          while (j != L[++k]);
-#endif
-#endif
           leftmv.as_int = left_block_mv(xd, m, k);
           abovemv.as_int = above_block_mv(m, k, mis);
           mv_contz = vp9_mv_cont(&leftmv, &abovemv);
@@ -847,7 +795,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
     }
   }
 
-#if CONFIG_SB8X8
   if (((rf == INTRA_FRAME && mode != I4X4_PRED) ||
        (rf != INTRA_FRAME && mode != SPLITMV)) &&
       pc->txfm_mode == TX_MODE_SELECT &&
@@ -862,23 +809,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
         vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]);
     }
   }
-#else
-  if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
-       (rf != INTRA_FRAME && !(mode == SPLITMV &&
-                               mi->partitioning == PARTITIONING_4X4))) &&
-      pc->txfm_mode == TX_MODE_SELECT &&
-          !(skip_coeff || vp9_segfeature_active(xd, segment_id,
-                                                SEG_LVL_SKIP))) {
-    TX_SIZE sz = mi->txfm_size;
-    // FIXME(rbultje) code ternary symbol once all experiments are merged
-    vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
-    if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) {
-      vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]);
-      if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
-        vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]);
-    }
-  }
-#endif
 }
 
 static void write_mb_modes_kf(const VP9_COMP *cpi,
@@ -901,11 +831,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
     vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP));
   }
 
-#if CONFIG_SB8X8
   if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
-#else
-  if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
-#endif
     sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
   else
     kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
@@ -915,7 +841,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
     do {
       const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
       const B_PREDICTION_MODE l = (xd->left_available ||
-                                  (i & (3 >> CONFIG_SB8X8))) ?
+                                  (i & 1)) ?
                                   left_block_mode(m, i) : B_DC_PRED;
       const int bm = m->bmi[i].as_mode.first;
 
@@ -923,23 +849,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
       ++intra_mode_stats [A] [L] [bm];
 #endif
       write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]);
-    } while (++i < (16 >> (CONFIG_SB8X8 * 2)));
+    } while (++i < 4);
   }
-#if !CONFIG_SB8X8
-  if (ym == I8X8_PRED) {
-    write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->fc.i8x8_mode_prob);
-    // printf("    mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout);
-    write_i8x8_mode(bc, m->bmi[2].as_mode.first, c->fc.i8x8_mode_prob);
-    // printf("    mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout);
-    write_i8x8_mode(bc, m->bmi[8].as_mode.first, c->fc.i8x8_mode_prob);
-    // printf("    mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout);
-    write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->fc.i8x8_mode_prob);
-    // printf("    mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout);
-  } else
-#endif
-    write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
 
-#if CONFIG_SB8X8
+  write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+
   if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT &&
       !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
     TX_SIZE sz = m->mbmi.txfm_size;
@@ -951,19 +865,6 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
         vp9_write(bc, sz != TX_16X16, c->prob_tx[2]);
     }
   }
-#else
-  if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
-      !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
-    TX_SIZE sz = m->mbmi.txfm_size;
-    // FIXME(rbultje) code ternary symbol once all experiments are merged
-    vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
-    if (sz != TX_4X4 && ym <= TM_PRED) {
-      vp9_write(bc, sz != TX_8X8, c->prob_tx[1]);
-      if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
-        vp9_write(bc, sz != TX_16X16, c->prob_tx[2]);
-    }
-  }
-#endif
 }
 
 
@@ -1171,15 +1072,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
   else
     assert(0);
 
-#if CONFIG_SB8X8
   if (bsize > BLOCK_SIZE_SB8X8) {
-#else
-  if (bsize > BLOCK_SIZE_MB16X16) {
-#endif
     int pl;
     xd->left_seg_context =
-        cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
-    xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
+        cm->left_seg_context + ((mi_row >> 1) & 3);
+    xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
     pl = partition_plane_context(xd, bsize);
     // encode the partition information
     write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
@@ -1214,15 +1111,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
   }
 
   // update partition context
-#if CONFIG_SB8X8
   if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
-#else
-  if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
-#endif
     return;
 
-  xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
+  xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
+  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
   update_partition_context(xd, subsize, bsize);
 }
 
@@ -1239,12 +1132,12 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
 
   for (mi_row = c->cur_tile_mi_row_start;
        mi_row < c->cur_tile_mi_row_end;
-       mi_row += (4 << CONFIG_SB8X8), m_ptr += (4 << CONFIG_SB8X8) * mis) {
+       mi_row += 8, m_ptr += 8 * mis) {
     m = m_ptr;
     vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context));
     for (mi_col = c->cur_tile_mi_col_start;
          mi_col < c->cur_tile_mi_col_end;
-         mi_col += (4 << CONFIG_SB8X8), m += (4 << CONFIG_SB8X8))
+         mi_col += 8, m += 8)
       write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col,
                      BLOCK_SIZE_SB64X64);
   }
@@ -2152,19 +2045,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
   vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
   vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
   vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
-  vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
-#endif
   vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob);
   cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
 #if CONFIG_COMP_INTERINTRA_PRED
   cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob;
 #endif
   vp9_zero(cpi->sub_mv_ref_count);
-#if !CONFIG_SB8X8
-  vp9_zero(cpi->mbsplit_count);
-#endif
   vp9_zero(cpi->common.fc.mv_ref_ct);
 
   update_coef_probs(cpi, &header_bc);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index e6c24f05aca710d7ad36ac01abd114f70d2a2430..6bc42c7ff4cfd74d8a18fe48518388c3511a2ad4 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -29,7 +29,7 @@ typedef struct {
     B_PREDICTION_MODE mode;
     int_mv mv;
     int_mv second_mv;
-  } bmi[16 >> (2 * CONFIG_SB8X8)];
+  } bmi[4];
 } PARTITION_INFO;
 
 // Structure to hold snapshot of coding context during the mode picking process
@@ -117,9 +117,6 @@ struct macroblock {
   int mbmode_cost[2][MB_MODE_COUNT];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
   int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES];
-#if !CONFIG_SB8X8
-  int i8x8_mode_costs[MB_MODE_COUNT];
-#endif
   int inter_bmode_costs[B_MODE_COUNT];
   int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
                              [VP9_SWITCHABLE_FILTERS];
@@ -143,11 +140,9 @@ struct macroblock {
 
   // Structure to hold context for each of the 4 MBs within a SB:
   // when encoded as 4 independent MBs:
-#if CONFIG_SB8X8
   PICK_MODE_CONTEXT sb8_context[4][4][4];
   PICK_MODE_CONTEXT sb8x16_context[4][4][2];
   PICK_MODE_CONTEXT sb16x8_context[4][4][2];
-#endif
   PICK_MODE_CONTEXT mb_context[4][4];
   PICK_MODE_CONTEXT sb32x16_context[4][2];
   PICK_MODE_CONTEXT sb16x32_context[4][2];
@@ -164,12 +159,6 @@ struct macroblock {
   void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
                          int y_blocks);
-#if !CONFIG_SB8X8
-  void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2,
-                              int y_blocks);
-  void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
-                         int y_blocks);
-#endif
 };
 
 #endif  // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f8665974bddfed2b051f45b40b749524aae247a2..52ce1618e271eab06de7792126a9298487770a47 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -47,11 +47,6 @@ int enc_debug = 0;
 
 void vp9_select_interp_filter_type(VP9_COMP *cpi);
 
-#if !CONFIG_SB8X8
-static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
-                              int output_enabled, int mi_row, int mi_col);
-#endif
-
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
                               int output_enabled, int mi_row, int mi_col,
                               BLOCK_SIZE_TYPE bsize);
@@ -392,9 +387,9 @@ static void update_state(VP9_COMP *cpi,
                sizeof(PARTITION_INFO));
 
     mbmi->mv[0].as_int =
-        x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].mv.as_int;
+        x->partition_info->bmi[3].mv.as_int;
     mbmi->mv[1].as_int =
-        x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].second_mv.as_int;
+        x->partition_info->bmi[3].second_mv.as_int;
   }
 
   x->skip = ctx->skip;
@@ -448,9 +443,6 @@ static void update_state(VP9_COMP *cpi,
       THR_D27_PRED /*D27_PRED*/,
       THR_D63_PRED /*D63_PRED*/,
       THR_TM /*TM_PRED*/,
-#if !CONFIG_SB8X8
-      THR_I8X8_PRED /*I8X8_PRED*/,
-#endif
       THR_B_PRED /*I4X4_PRED*/,
     };
     cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
@@ -489,14 +481,13 @@ static void update_state(VP9_COMP *cpi,
       mbmi->best_second_mv.as_int = best_second_mv.as_int;
       vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
     }
-#if CONFIG_SB8X8
+
     if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
       int i, j;
       for (j = 0; j < bh; ++j)
         for (i = 0; i < bw; ++i)
           xd->mode_info_context[mis * j + i].mbmi = *mbmi;
     }
-#endif
 #if CONFIG_COMP_INTERINTRA_PRED
     if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
         mbmi->second_ref_frame <= INTRA_FRAME) {
@@ -567,8 +558,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi,
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
-  xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
-  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
+  xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+  xd->left_seg_context  = cm->left_seg_context + ((mi_row >> 1) & 3);
 }
 
 static void set_offsets(VP9_COMP *cpi,
@@ -580,17 +571,17 @@ static void set_offsets(VP9_COMP *cpi,
   const int dst_fb_idx = cm->new_fb_idx;
   const int idx_str = xd->mode_info_stride * mi_row + mi_col;
   const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
-  const int mb_row = mi_row >> CONFIG_SB8X8;
-  const int mb_col = mi_col >> CONFIG_SB8X8;
+  const int mb_row = mi_row >> 1;
+  const int mb_col = mi_col >> 1;
   const int idx_map = mb_row * cm->mb_cols + mb_col;
   int i;
 
   // entropy context structures
   for (i = 0; i < MAX_MB_PLANE; i++) {
     xd->plane[i].above_context = cm->above_context[i] +
-        (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[i].subsampling_x));
+        (mi_col * 2 >>  xd->plane[i].subsampling_x);
     xd->plane[i].left_context = cm->left_context[i] +
-        (((mi_row * 4 >> CONFIG_SB8X8) & 15) >> xd->plane[i].subsampling_y);
+        (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
   }
 
   // partition contexts
@@ -653,9 +644,9 @@ static void set_offsets(VP9_COMP *cpi,
       const int p16 = ((mb_row & 1) << 1) +  (mb_col & 1);
       const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
       const int tile_progress =
-          cm->cur_tile_mi_col_start * cm->mb_rows >> CONFIG_SB8X8;
+          cm->cur_tile_mi_col_start * cm->mb_rows >> 1;
       const int mb_cols =
-          (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> CONFIG_SB8X8;
+          (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> 1;
 
       cpi->seg0_progress =
           ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs;
@@ -665,49 +656,6 @@ static void set_offsets(VP9_COMP *cpi,
   }
 }
 
-#if !CONFIG_SB8X8
-static int pick_mb_mode(VP9_COMP *cpi,
-                        int mi_row,
-                        int mi_col,
-                        TOKENEXTRA **tp,
-                        int *totalrate,
-                        int *totaldist) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int splitmodes_used = 0;
-  MB_MODE_INFO *mbmi;
-
-  set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_MB16X16);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-    vp9_activity_masking(cpi, x);
-
-  mbmi = &xd->mode_info_context->mbmi;
-  mbmi->sb_type = BLOCK_SIZE_MB16X16;
-
-  // Find best coding mode & reconstruct the MB so it is available
-  // as a predictor for MBs that follow in the SB
-  if (cm->frame_type == KEY_FRAME) {
-    vp9_rd_pick_intra_mode(cpi, x, totalrate, totaldist);
-
-    // Save the coding context
-    vpx_memcpy(&x->mb_context[xd->sb_index][xd->mb_index].mic,
-               xd->mode_info_context, sizeof(MODE_INFO));
-  } else {
-    vp9_pick_mode_inter_macroblock(cpi, x, mi_row, mi_col,
-                                   totalrate, totaldist);
-    splitmodes_used += (mbmi->mode == SPLITMV);
-
-    if (cpi->mb.e_mbd.segmentation_enabled && mbmi->segment_id == 0) {
-      cpi->seg0_idx++;
-    }
-  }
-
-  return splitmodes_used;
-}
-#endif
-
 static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
                           TOKENEXTRA **tp, int *totalrate, int *totaldist,
                           BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
@@ -790,15 +738,10 @@ static void set_block_index(MACROBLOCKD *xd, int idx,
                             BLOCK_SIZE_TYPE bsize) {
   if (bsize >= BLOCK_SIZE_SB32X32) {
     xd->sb_index = idx;
-#if CONFIG_SB8X8
   } else if (bsize >= BLOCK_SIZE_MB16X16) {
     xd->mb_index = idx;
   } else {
     xd->b_index = idx;
-#else
-  } else {
-    xd->mb_index = idx;
-#endif
   }
 }
 
@@ -821,14 +764,12 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
       return &x->sb16x32_context[xd->sb_index][xd->mb_index];
     case BLOCK_SIZE_MB16X16:
       return &x->mb_context[xd->sb_index][xd->mb_index];
-#if CONFIG_SB8X8
     case BLOCK_SIZE_SB16X8:
       return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
     case BLOCK_SIZE_SB8X16:
       return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
     case BLOCK_SIZE_SB8X8:
       return &x->sb8_context[xd->sb_index][xd->mb_index][xd->b_index];
-#endif
     default:
       assert(0);
       return NULL;
@@ -849,17 +790,7 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
     set_block_index(xd, sub_index, bsize);
   set_offsets(cpi, mi_row, mi_col, bsize);
   update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
-#if !CONFIG_SB8X8
-  if (bsize == BLOCK_SIZE_MB16X16) {
-    if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-      vp9_activity_masking(cpi, x);
-
-    encode_macroblock(cpi, tp, output_enabled, mi_row, mi_col);
-  } else
-#endif
-  {
-    encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
-  }
+  encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
 
   if (output_enabled) {
     update_stats(cpi, mi_row, mi_col);
@@ -872,10 +803,8 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
 static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
                       int mi_row, int mi_col, int output_enabled,
                       BLOCK_SIZE_TYPE level,
-                      BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4]
-#if CONFIG_SB8X8
-                      , BLOCK_SIZE_TYPE c3[4][4]
-#endif
+                      BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4],
+                      BLOCK_SIZE_TYPE c3[4][4]
                       ) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
@@ -887,23 +816,13 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-#if CONFIG_SB8X8
   if (level > BLOCK_SIZE_SB8X8) {
-#endif
     set_partition_seg_context(cpi, mi_row, mi_col);
     pl = partition_plane_context(xd, level);
-#if CONFIG_SB8X8
   }
-#endif
 
   if (bsl == bwl && bsl == bhl) {
-    if (output_enabled &&
-#if CONFIG_SB8X8
-        level > BLOCK_SIZE_SB8X8
-#else
-        level > BLOCK_SIZE_MB16X16
-#endif
-        )
+    if (output_enabled && level > BLOCK_SIZE_SB8X8)
       cpi->partition_count[pl][PARTITION_NONE]++;
     encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
   } else if (bsl == bhl && bsl > bwl) {
@@ -923,17 +842,11 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
     assert(bwl < bsl && bhl < bsl);
     if (level == BLOCK_SIZE_SB64X64) {
       subsize = BLOCK_SIZE_SB32X32;
-#if CONFIG_SB8X8
     } else if (level == BLOCK_SIZE_SB32X32) {
       subsize = BLOCK_SIZE_MB16X16;
     } else {
       assert(level == BLOCK_SIZE_MB16X16);
       subsize = BLOCK_SIZE_SB8X8;
-#else
-    } else {
-      assert(level == BLOCK_SIZE_SB32X32);
-      subsize = BLOCK_SIZE_MB16X16;
-#endif
     }
 
     if (output_enabled)
@@ -945,22 +858,12 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
       set_block_index(xd, i, subsize);
       encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
                 output_enabled, subsize,
-#if CONFIG_SB8X8
                 c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL);
-#else
-                c2 ? c2[i] : c1, NULL);
-#endif
     }
   }
 
-#if CONFIG_SB8X8
   if (level > BLOCK_SIZE_SB8X8 &&
-      (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl))
-#else
-  if (level > BLOCK_SIZE_MB16X16 &&
-      (level == BLOCK_SIZE_SB32X32 || bsl == bwl || bsl == bhl))
-#endif
-  {
+      (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
     set_partition_seg_context(cpi, mi_row, mi_col);
     update_partition_context(xd, c1, level);
   }
@@ -981,11 +884,9 @@ static void encode_sb_row(VP9_COMP *cpi,
 
   // Code each SB in the row
   for (mi_col = cm->cur_tile_mi_col_start;
-       mi_col < cm->cur_tile_mi_col_end; mi_col += (4 << CONFIG_SB8X8)) {
+       mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
     int i, p;
-#if CONFIG_SB8X8
     BLOCK_SIZE_TYPE mb_partitioning[4][4];
-#endif
     BLOCK_SIZE_TYPE sb_partitioning[4];
     BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32;
     int sb64_rate = 0, sb64_dist = 0;
@@ -996,20 +897,20 @@ static void encode_sb_row(VP9_COMP *cpi,
 
     for (p = 0; p < MAX_MB_PLANE; p++) {
       memcpy(a + 16 * p, cm->above_context[p] +
-                 (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+                 (mi_col * 2 >> xd->plane[p].subsampling_x),
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
       memcpy(l + 16 * p, cm->left_context[p],
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(&seg_a, cm->above_seg_context + (mi_col >> CONFIG_SB8X8),
+    memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1),
            sizeof(seg_a));
     memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
 
     // FIXME(rbultje): this function should probably be rewritten to be
     // recursive at some point in the future.
     for (i = 0; i < 4; i++) {
-      const int x_idx = (i & 1) << (1 + CONFIG_SB8X8);
-      const int y_idx = (i & 2) << CONFIG_SB8X8;
+      const int x_idx = (i & 1) << 2;
+      const int y_idx = (i & 2) << 1;
       int sb32_rate = 0, sb32_dist = 0;
       int splitmodes_used = 0;
       int sb32_skip = 0;
@@ -1026,27 +927,23 @@ static void encode_sb_row(VP9_COMP *cpi,
       for (p = 0; p < MAX_MB_PLANE; p++) {
         vpx_memcpy(l2 + 8 * p,
                    cm->left_context[p] +
-                       (y_idx * 4 >> (CONFIG_SB8X8 +
-                                      xd->plane[p].subsampling_y)),
+                       (y_idx * 2 >> xd->plane[p].subsampling_y),
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
         vpx_memcpy(a2 + 8 * p,
                    cm->above_context[p] +
-                       ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
-                                                 xd->plane[p].subsampling_x)),
+                       ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
 
       /* Encode MBs in raster order within the SB */
       for (j = 0; j < 4; j++) {
-        const int x_idx_m = x_idx + ((j & 1) << CONFIG_SB8X8);
-        const int y_idx_m = y_idx + ((j >> 1) << CONFIG_SB8X8);
+        const int x_idx_m = x_idx + ((j & 1) << 1);
+        const int y_idx_m = y_idx + ((j >> 1) << 1);
         int r, d;
-#if CONFIG_SB8X8
         int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
         ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
 
         mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
-#endif
 
         if (mi_row + y_idx_m >= cm->mi_rows ||
             mi_col + x_idx_m >= cm->mi_cols) {
@@ -1057,17 +954,14 @@ static void encode_sb_row(VP9_COMP *cpi,
         // Index of the MB in the SB 0..3
         xd->mb_index = j;
 
-#if CONFIG_SB8X8
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(l3 + 4 * p,
                      cm->left_context[p] +
-                         (y_idx_m * 4 >> (CONFIG_SB8X8 +
-                                          xd->plane[p].subsampling_y)),
+                         (y_idx_m * 2 >> xd->plane[p].subsampling_y),
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
           vpx_memcpy(a3 + 4 * p,
                      cm->above_context[p] +
-                         ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
 
@@ -1095,13 +989,11 @@ static void encode_sb_row(VP9_COMP *cpi,
         mb16_rate += x->partition_cost[pl][PARTITION_SPLIT];
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(cm->left_context[p] +
-                         (y_idx_m * 4 >> (CONFIG_SB8X8 +
-                                          xd->plane[p].subsampling_y)),
+                         (y_idx_m * 2 >> xd->plane[p].subsampling_y),
                      l3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
           vpx_memcpy(cm->above_context[p] +
-                         ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      a3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
@@ -1140,13 +1032,11 @@ static void encode_sb_row(VP9_COMP *cpi,
         }
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(cm->left_context[p] +
-                         (y_idx_m * 4 >> (CONFIG_SB8X8 +
-                                          xd->plane[p].subsampling_y)),
+                         (y_idx_m * 2 >> xd->plane[p].subsampling_y),
                      l3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
           vpx_memcpy(cm->above_context[p] +
-                         ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      a3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
@@ -1185,13 +1075,11 @@ static void encode_sb_row(VP9_COMP *cpi,
         }
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(cm->left_context[p] +
-                         (y_idx_m * 4 >> (CONFIG_SB8X8 +
-                                          xd->plane[p].subsampling_y)),
+                         (y_idx_m * 2 >> xd->plane[p].subsampling_y),
                      l3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
           vpx_memcpy(cm->above_context[p] +
-                         ((mi_col + x_idx_m) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
                      a3 + 4 * p,
                      sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
         }
@@ -1211,33 +1099,20 @@ static void encode_sb_row(VP9_COMP *cpi,
         }
         sb32_rate += mb16_rate;
         sb32_dist += mb16_dist;
-#else
-        splitmodes_used += pick_mb_mode(cpi, mi_row + y_idx_m,
-                                        mi_col + x_idx_m, tp, &r, &d);
-        sb32_rate += r;
-        sb32_dist += d;
-#endif
 
         // Dummy encode, do not do the tokenization
-#if CONFIG_SB8X8
         encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0,
                   BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL);
-#else
-        encode_macroblock(cpi, tp, 0, mi_row + y_idx_m,
-                          mi_col + x_idx_m);
-#endif
       }
 
       /* Restore L & A coding context to those in place on entry */
       for (p = 0; p < MAX_MB_PLANE; p++) {
         vpx_memcpy(cm->left_context[p] +
-                       (y_idx * 4 >> (CONFIG_SB8X8 +
-                                      xd->plane[p].subsampling_y)),
+                       (y_idx * 2 >> xd->plane[p].subsampling_y),
                    l2 + 8 * p,
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
         vpx_memcpy(cm->above_context[p] +
-                       ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
-                                                 xd->plane[p].subsampling_x)),
+                       ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                    a2 + 8 * p,
                    sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
       }
@@ -1252,14 +1127,14 @@ static void encode_sb_row(VP9_COMP *cpi,
       }
 
       // check 32x16
-      if (mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols) {
+      if (mi_col + x_idx + 4 <= cm->mi_cols) {
         int r, d;
 
         xd->mb_index = 0;
         pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
                       tp, &r, &d, BLOCK_SIZE_SB32X16,
                       &x->sb32x16_context[xd->sb_index][xd->mb_index]);
-        if (mi_row + y_idx + (1 << CONFIG_SB8X8) < cm->mi_rows) {
+        if (mi_row + y_idx + 2 < cm->mi_rows) {
           int r2, d2;
 
           update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index],
@@ -1268,7 +1143,7 @@ static void encode_sb_row(VP9_COMP *cpi,
                             0, mi_row + y_idx, mi_col + x_idx,
                             BLOCK_SIZE_SB32X16);
           xd->mb_index = 1;
-          pick_sb_modes(cpi, mi_row + y_idx + (1 << CONFIG_SB8X8),
+          pick_sb_modes(cpi, mi_row + y_idx + 2,
                         mi_col + x_idx, tp, &r2, &d2, BLOCK_SIZE_SB32X16,
                         &x->sb32x16_context[xd->sb_index][xd->mb_index]);
           r += r2;
@@ -1289,27 +1164,25 @@ static void encode_sb_row(VP9_COMP *cpi,
 
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(cm->left_context[p] +
-                         (y_idx * 4 >> (CONFIG_SB8X8 +
-                                        xd->plane[p].subsampling_y)),
+                         (y_idx * 2 >> xd->plane[p].subsampling_y),
                      l2 + 8 * p,
                      sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
           vpx_memcpy(cm->above_context[p] +
-                         ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                      a2 + 8 * p,
                      sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
         }
       }
 
       // check 16x32
-      if (mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) {
+      if (mi_row + y_idx + 4 <= cm->mi_rows) {
         int r, d;
 
         xd->mb_index = 0;
         pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
                       tp, &r, &d, BLOCK_SIZE_SB16X32,
                       &x->sb16x32_context[xd->sb_index][xd->mb_index]);
-        if (mi_col + x_idx + (1 << CONFIG_SB8X8) < cm->mi_cols) {
+        if (mi_col + x_idx + 2 < cm->mi_cols) {
           int r2, d2;
 
           update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index],
@@ -1319,7 +1192,7 @@ static void encode_sb_row(VP9_COMP *cpi,
                             BLOCK_SIZE_SB16X32);
           xd->mb_index = 1;
           pick_sb_modes(cpi, mi_row + y_idx,
-                        mi_col + x_idx + (1 << CONFIG_SB8X8),
+                        mi_col + x_idx + 2,
                         tp, &r2, &d2, BLOCK_SIZE_SB16X32,
                         &x->sb16x32_context[xd->sb_index][xd->mb_index]);
           r += r2;
@@ -1340,21 +1213,19 @@ static void encode_sb_row(VP9_COMP *cpi,
 
         for (p = 0; p < MAX_MB_PLANE; p++) {
           vpx_memcpy(cm->left_context[p] +
-                         (y_idx * 4 >> (CONFIG_SB8X8 +
-                                        xd->plane[p].subsampling_y)),
+                         (y_idx * 2 >> xd->plane[p].subsampling_y),
                      l2 + 8 * p,
                      sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
           vpx_memcpy(cm->above_context[p] +
-                         ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
-                                                   xd->plane[p].subsampling_x)),
+                         ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
                      a2 + 8 * p,
                      sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
         }
       }
 
       if (!sb32_skip &&
-          mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols &&
-          mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) {
+          mi_col + x_idx + 4 <= cm->mi_cols &&
+          mi_row + y_idx + 4 <= cm->mi_rows) {
         int r, d;
 
         /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
@@ -1389,23 +1260,19 @@ static void encode_sb_row(VP9_COMP *cpi,
       // instead of small->big) means we can use as threshold for small, which
       // may enable breakouts if RD is not good enough (i.e. faster)
       encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0,
-#if CONFIG_SB8X8
                 BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i],
                 NULL);
-#else
-                BLOCK_SIZE_SB32X32, sb_partitioning[i], NULL);
-#endif
     }
 
     for (p = 0; p < MAX_MB_PLANE; p++) {
       memcpy(cm->above_context[p] +
-                 (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+                 (mi_col * 2 >> xd->plane[p].subsampling_x),
              a + 16 * p,
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
       memcpy(cm->left_context[p], l + 16 * p,
              sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
     }
-    memcpy(cm->above_seg_context + (mi_col >> CONFIG_SB8X8), &seg_a,
+    memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a,
            sizeof(seg_a));
     memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
 
@@ -1414,14 +1281,14 @@ static void encode_sb_row(VP9_COMP *cpi,
     sb64_rate += x->partition_cost[pl][PARTITION_SPLIT];
 
     // check 64x32
-    if (mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols && !(cm->mb_rows & 1)) {
+    if (mi_col + 8 <= cm->mi_cols && !(cm->mb_rows & 1)) {
       int r, d;
 
       xd->sb_index = 0;
       pick_sb_modes(cpi, mi_row, mi_col,
                     tp, &r, &d, BLOCK_SIZE_SB64X32,
                     &x->sb64x32_context[xd->sb_index]);
-      if (mi_row + (2 << CONFIG_SB8X8) != cm->mi_rows) {
+      if (mi_row + 4 != cm->mi_rows) {
         int r2, d2;
 
         update_state(cpi, &x->sb64x32_context[xd->sb_index],
@@ -1429,7 +1296,7 @@ static void encode_sb_row(VP9_COMP *cpi,
         encode_superblock(cpi, tp,
                           0, mi_row, mi_col, BLOCK_SIZE_SB64X32);
         xd->sb_index = 1;
-        pick_sb_modes(cpi, mi_row + (2 << CONFIG_SB8X8), mi_col,
+        pick_sb_modes(cpi, mi_row + 4, mi_col,
                       tp, &r2, &d2, BLOCK_SIZE_SB64X32,
                       &x->sb64x32_context[xd->sb_index]);
         r += r2;
@@ -1450,7 +1317,7 @@ static void encode_sb_row(VP9_COMP *cpi,
 
       for (p = 0; p < MAX_MB_PLANE; p++) {
         memcpy(cm->above_context[p] +
-                   (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+                   (mi_col * 2 >> xd->plane[p].subsampling_x),
                a + 16 * p,
                sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
         memcpy(cm->left_context[p], l + 16 * p,
@@ -1459,14 +1326,14 @@ static void encode_sb_row(VP9_COMP *cpi,
     }
 
     // check 32x64
-    if (mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows && !(cm->mb_cols & 1)) {
+    if (mi_row + 8 <= cm->mi_rows && !(cm->mb_cols & 1)) {
       int r, d;
 
       xd->sb_index = 0;
       pick_sb_modes(cpi, mi_row, mi_col,
                     tp, &r, &d, BLOCK_SIZE_SB32X64,
                     &x->sb32x64_context[xd->sb_index]);
-      if (mi_col + (2 << CONFIG_SB8X8) != cm->mi_cols) {
+      if (mi_col + 4 != cm->mi_cols) {
         int r2, d2;
 
         update_state(cpi, &x->sb32x64_context[xd->sb_index],
@@ -1474,7 +1341,7 @@ static void encode_sb_row(VP9_COMP *cpi,
         encode_superblock(cpi, tp,
                           0, mi_row, mi_col, BLOCK_SIZE_SB32X64);
         xd->sb_index = 1;
-        pick_sb_modes(cpi, mi_row, mi_col + (2 << CONFIG_SB8X8),
+        pick_sb_modes(cpi, mi_row, mi_col + 4,
                       tp, &r2, &d2, BLOCK_SIZE_SB32X64,
                       &x->sb32x64_context[xd->sb_index]);
         r += r2;
@@ -1495,7 +1362,7 @@ static void encode_sb_row(VP9_COMP *cpi,
 
       for (p = 0; p < MAX_MB_PLANE; p++) {
         memcpy(cm->above_context[p] +
-                   (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+                   (mi_col * 2 >> xd->plane[p].subsampling_x),
                a + 16 * p,
                sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
         memcpy(cm->left_context[p], l + 16 * p,
@@ -1504,8 +1371,8 @@ static void encode_sb_row(VP9_COMP *cpi,
     }
 
     if (!sb64_skip &&
-        mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols &&
-        mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows) {
+        mi_col + 8 <= cm->mi_cols &&
+        mi_row + 8 <= cm->mi_rows) {
       int r, d;
 
       pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d,
@@ -1525,11 +1392,7 @@ static void encode_sb_row(VP9_COMP *cpi,
 
     assert(tp_orig == *tp);
     encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64,
-#if CONFIG_SB8X8
               sb64_partitioning, sb_partitioning, mb_partitioning);
-#else
-              sb64_partitioning, sb_partitioning);
-#endif
     assert(tp_orig < *tp);
   }
 }
@@ -1574,14 +1437,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   vp9_zero(cpi->count_mb_ref_frame_usage)
   vp9_zero(cpi->bmode_count)
   vp9_zero(cpi->ymode_count)
-#if !CONFIG_SB8X8
-  vp9_zero(cpi->i8x8_mode_count)
-#endif
   vp9_zero(cpi->y_uv_mode_count)
   vp9_zero(cpi->sub_mv_ref_count)
-#if !CONFIG_SB8X8
-  vp9_zero(cpi->mbsplit_count)
-#endif
   vp9_zero(cpi->common.fc.mv_ref_ct)
   vp9_zero(cpi->sb_ymode_count)
   vp9_zero(cpi->partition_count);
@@ -1719,7 +1576,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
           vp9_get_tile_col_offsets(cm, tile_col);
           for (mi_row = cm->cur_tile_mi_row_start;
                mi_row < cm->cur_tile_mi_row_end;
-               mi_row += (4 << CONFIG_SB8X8)) {
+               mi_row += 8) {
             encode_sb_row(cpi, mi_row, &tp, &totalrate);
           }
           cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
@@ -1848,17 +1705,11 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
     assert(bwl < bsl && bhl < bsl);
     if (bsize == BLOCK_SIZE_SB64X64) {
       subsize = BLOCK_SIZE_SB32X32;
-#if CONFIG_SB8X8
     } else if (bsize == BLOCK_SIZE_SB32X32) {
       subsize = BLOCK_SIZE_MB16X16;
     } else {
       assert(bsize == BLOCK_SIZE_MB16X16);
       subsize = BLOCK_SIZE_SB8X8;
-#else
-    } else {
-      assert(bsize == BLOCK_SIZE_SB32X32);
-      subsize = BLOCK_SIZE_MB16X16;
-#endif
     }
 
     for (n = 0; n < 4; n++) {
@@ -1878,10 +1729,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
   MODE_INFO *mi, *mi_ptr = cm->mi;
 
   for (mi_row = 0; mi_row < cm->mi_rows;
-       mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) {
+       mi_row += 8, mi_ptr += 8 * mis) {
     mi = mi_ptr;
     for (mi_col = 0; mi_col < cm->mi_cols;
-         mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) {
+         mi_col += 8, mi += 8) {
       reset_skip_txfm_size_sb(cpi, mi, txfm_max,
                               mi_row, mi_col, BLOCK_SIZE_SB64X64);
     }
@@ -2063,41 +1914,16 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
 
     do {
       ++ bct[xd->block[b].bmi.as_mode.first];
-    } while (++b < (16 >> (CONFIG_SB8X8 * 2)));
-  }
-
-#if !CONFIG_SB8X8
-  if (m == I8X8_PRED) {
-    i8x8_modes[xd->block[0].bmi.as_mode.first]++;
-    i8x8_modes[xd->block[2].bmi.as_mode.first]++;
-    i8x8_modes[xd->block[8].bmi.as_mode.first]++;
-    i8x8_modes[xd->block[10].bmi.as_mode.first]++;
+    } while (++b < 4);
   }
 #endif
-#endif
 
-#if CONFIG_SB8X8
-  if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8)
-#else
-  if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16)
-#endif
-  {
+  if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) {
     ++cpi->sb_ymode_count[m];
   } else {
     ++cpi->ymode_count[m];
   }
-#if !CONFIG_SB8X8
-  if (m != I8X8_PRED)
-#endif
     ++cpi->y_uv_mode_count[m][uvm];
-#if !CONFIG_SB8X8
-  else {
-    cpi->i8x8_mode_count[xd->mode_info_context->bmi[0].as_mode.first]++;
-    cpi->i8x8_mode_count[xd->mode_info_context->bmi[2].as_mode.first]++;
-    cpi->i8x8_mode_count[xd->mode_info_context->bmi[8].as_mode.first]++;
-    cpi->i8x8_mode_count[xd->mode_info_context->bmi[10].as_mode.first]++;
-  }
-#endif
   if (m == I4X4_PRED) {
     int b = 0;
     do {
@@ -2106,7 +1932,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
       if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
 #endif
       ++cpi->bmode_count[m];
-    } while (++b < (16 >> (CONFIG_SB8X8 * 2)));
+    } while (++b < 4);
   }
 }
 
@@ -2131,257 +1957,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
 #endif
 }
 
-#if !CONFIG_SB8X8
-static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
-                              int output_enabled,
-                              int mi_row, int mi_col) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mode_info_context;
-  MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const int mis = cm->mode_info_stride;
-#if CONFIG_SB8X8
-  int n;
-#endif
-
-  assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_MB16X16);
-
-#ifdef ENC_DEBUG
-  enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
-               mb_row == 8 && mb_col == 0 && output_enabled);
-  if (enc_debug)
-    printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
-#endif
-  if (cm->frame_type == KEY_FRAME) {
-    if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) {
-      // Adjust the zbin based on this MB rate.
-      adjust_act_zbin(cpi, x);
-      vp9_update_zbin_extra(cpi, x);
-    }
-  } else {
-    vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
-    if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-      // Adjust the zbin based on this MB rate.
-      adjust_act_zbin(cpi, x);
-    }
-
-    // Experimental code. Special case for gf and arf zeromv modes.
-    // Increase zbin size to suppress noise
-    cpi->zbin_mode_boost = 0;
-    if (cpi->zbin_mode_boost_enabled) {
-      if (mbmi->ref_frame != INTRA_FRAME) {
-        if (mbmi->mode == ZEROMV) {
-          if (mbmi->ref_frame != LAST_FRAME)
-            cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-          else
-            cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-        } else if (mbmi->mode == SPLITMV)
-          cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
-        else
-          cpi->zbin_mode_boost = MV_ZBIN_BOOST;
-      } else {
-        cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
-      }
-    }
-
-    vp9_update_zbin_extra(cpi, x);
-  }
-
-  if (mbmi->ref_frame == INTRA_FRAME) {
-#if 0  // def ENC_DEBUG
-    if (enc_debug) {
-      printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip,
-             mbmi->txfm_size);
-    }
-#endif
-    if (mbmi->mode == I4X4_PRED) {
-      vp9_encode_intra16x16mbuv(cm, x);
-      vp9_encode_intra4x4mby(x, BLOCK_SIZE_MB16X16);
-    } else if (mbmi->mode == I8X8_PRED) {
-      vp9_encode_intra8x8mby(x);
-      vp9_encode_intra8x8mbuv(x);
-    } else {
-      vp9_encode_intra16x16mbuv(cm, x);
-      vp9_encode_intra16x16mby(cm, x);
-    }
-
-    if (output_enabled)
-      sum_intra_stats(cpi, x);
-  } else {
-    int ref_fb_idx, second_ref_fb_idx;
-#ifdef ENC_DEBUG
-    if (enc_debug)
-      printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n",
-             mbmi->mode, x->skip, mbmi->txfm_size,
-             mbmi->ref_frame, mbmi->second_ref_frame,
-             mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
-             mbmi->interp_filter);
-#endif
-
-    assert(cm->frame_type != KEY_FRAME);
-
-    if (mbmi->ref_frame == LAST_FRAME)
-      ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
-    else if (mbmi->ref_frame == GOLDEN_FRAME)
-      ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
-    else
-      ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
-
-    if (mbmi->second_ref_frame > 0) {
-      if (mbmi->second_ref_frame == LAST_FRAME)
-        second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
-      else if (mbmi->second_ref_frame == GOLDEN_FRAME)
-        second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
-      else
-        second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
-    }
-
-    setup_pre_planes(xd,
-        &cpi->common.yv12_fb[ref_fb_idx],
-        mbmi->second_ref_frame > 0 ? &cpi->common.yv12_fb[second_ref_fb_idx]
-                                   : NULL,
-        mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv);
-
-    if (!x->skip) {
-      vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
-      vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16);
-    } else {
-      vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
-#if CONFIG_COMP_INTERINTRA_PRED
-      if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
-        vp9_build_interintra_predictors(xd,
-                                        xd->plane[0].dst.buf,
-                                        xd->plane[1].dst.buf,
-                                        xd->plane[2].dst.buf,
-                                        xd->plane[0].dst.stride,
-                                        xd->plane[1].dst.stride,
-                                        BLOCK_SIZE_MB16X16);
-      }
-#endif
-    }
-  }
-
-  if (!x->skip) {
-#ifdef ENC_DEBUG
-    if (enc_debug) {
-      int i, j;
-      printf("\n");
-      printf("qcoeff\n");
-      for (i = 0; i < 384; i++) {
-        printf("%3d ", xd->qcoeff[i]);
-        if (i % 16 == 15) printf("\n");
-      }
-      printf("\n");
-      printf("predictor\n");
-      for (i = 0; i < 384; i++) {
-        printf("%3d ", xd->predictor[i]);
-        if (i % 16 == 15) printf("\n");
-      }
-      printf("\n");
-      printf("src_diff\n");
-      for (i = 0; i < 384; i++) {
-        printf("%3d ", x->src_diff[i]);
-        if (i % 16 == 15) printf("\n");
-      }
-      printf("\n");
-      printf("diff\n");
-      for (i = 0; i < 384; i++) {
-        printf("%3d ", xd->block[0].diff[i]);
-        if (i % 16 == 15) printf("\n");
-      }
-      printf("\n");
-      printf("final y\n");
-      for (i = 0; i < 16; i++) {
-        for (j = 0; j < 16; j++)
-          printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
-        printf("\n");
-      }
-      printf("\n");
-      printf("final u\n");
-      for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++)
-          printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
-        printf("\n");
-      }
-      printf("\n");
-      printf("final v\n");
-      for (i = 0; i < 8; i++) {
-        for (j = 0; j < 8; j++)
-          printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
-        printf("\n");
-      }
-      fflush(stdout);
-    }
-#endif
-
-    vp9_tokenize_sb(cpi, xd, t, !output_enabled, BLOCK_SIZE_MB16X16);
-  } else {
-    // FIXME(rbultje): not tile-aware (mi - 1)
-    int mb_skip_context =
-      (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff;
-
-    mbmi->mb_skip_coeff = 1;
-    if (output_enabled)
-      cpi->skip_true_count[mb_skip_context]++;
-    vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
-  }
-
-#if CONFIG_SB8X8
-  // copy skip flag on all mb_mode_info contexts in this SB
-  // if this was a skip at this txfm size
-  for (n = 1; n < 4; n++) {
-    const int x_idx = n & 1, y_idx = n >> 1;
-    if (mi_col + x_idx < cm->mi_cols && mi_row + y_idx < cm->mi_rows)
-      mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
-  }
-#endif
-
-  if (output_enabled) {
-    int segment_id = mbmi->segment_id;
-    if (cpi->common.txfm_mode == TX_MODE_SELECT &&
-        !(mbmi->mb_skip_coeff ||
-          vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP))) {
-      assert(mbmi->txfm_size <= TX_16X16);
-      if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED &&
-          mbmi->mode != SPLITMV) {
-        cpi->txfm_count_16x16p[mbmi->txfm_size]++;
-      } else if (mbmi->mode == I8X8_PRED ||
-                 (mbmi->mode == SPLITMV &&
-                  mbmi->partitioning != PARTITIONING_4X4)) {
-        cpi->txfm_count_8x8p[mbmi->txfm_size]++;
-      }
-    } else {
-#if CONFIG_SB8X8
-      int y, x;
-#endif
-      if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED &&
-          mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) {
-        mbmi->txfm_size = TX_16X16;
-      } else if (mbmi->mode != I4X4_PRED &&
-                 !(mbmi->mode == SPLITMV &&
-                   mbmi->partitioning == PARTITIONING_4X4) &&
-                 cpi->common.txfm_mode >= ALLOW_8X8) {
-        mbmi->txfm_size = TX_8X8;
-      } else {
-        mbmi->txfm_size = TX_4X4;
-      }
-
-#if CONFIG_SB8X8
-      for (y = 0; y < 2; y++) {
-        for (x = !y; x < 2; x++) {
-          if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) {
-            mi[mis * y + x].mbmi.txfm_size = mbmi->txfm_size;
-          }
-        }
-      }
-#endif
-    }
-  }
-}
-#endif
-
 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
                               int output_enabled, int mi_row, int mi_col,
                               BLOCK_SIZE_TYPE bsize) {
@@ -2431,7 +2006,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
     vp9_update_zbin_extra(cpi, x);
   }
 
-#if CONFIG_SB8X8
   if (xd->mode_info_context->mbmi.mode == I4X4_PRED) {
     assert(bsize == BLOCK_SIZE_SB8X8 &&
            xd->mode_info_context->mbmi.txfm_size == TX_4X4);
@@ -2442,9 +2016,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
 
     if (output_enabled)
       sum_intra_stats(cpi, x);
-  } else
-#endif
-  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+  } else if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
     vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
     vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize);
     if (output_enabled)
@@ -2479,13 +2051,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
   }
 
-#if CONFIG_SB8X8
   if (xd->mode_info_context->mbmi.mode == I4X4_PRED) {
     assert(bsize == BLOCK_SIZE_SB8X8);
     vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize);
-  } else
-#endif
-  if (!x->skip) {
+  } else if (!x->skip) {
     vp9_encode_sb(cm, x, bsize);
     vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize);
   } else {
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 268058e32c48e59f54b13355ef37edfbca5be279..72a6603f8fab19032a432fbca65728e300c8feee 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -112,151 +112,3 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
   vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
   vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16);
 }
-
-#if !CONFIG_SB8X8
-void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  uint8_t* const src =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                x->plane[0].src.buf, x->plane[0].src.stride);
-  int16_t* const src_diff =
-      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                x->plane[0].src_diff);
-  int16_t* const diff =
-      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                xd->plane[0].diff);
-  uint8_t* const dst =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-  const int iblock[4] = {0, 1, 4, 5};
-  int i;
-  TX_TYPE tx_type;
-
-  vp9_intra8x8_predict(xd, ib, xd->mode_info_context->bmi[ib].as_mode.first,
-                       dst, xd->plane[0].dst.stride);
-  // generate residual blocks
-  vp9_subtract_block(8, 8, src_diff, 16,
-                     src, x->plane[0].src.stride,
-                     dst, xd->plane[0].dst.stride);
-
-  if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-    int idx = (ib & 0x02) ? (ib + 2) : ib;
-    int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
-    int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
-
-    assert(idx < 16);
-    tx_type = get_tx_type_8x8(xd, ib);
-    if (tx_type != DCT_DCT) {
-      vp9_short_fht8x8(src_diff, coeff, 16, tx_type);
-      x->quantize_b_8x8(x, idx, tx_type, 16);
-      vp9_short_iht8x8(dqcoeff, diff, 16, tx_type);
-    } else {
-      x->fwd_txm8x8(src_diff, coeff, 32);
-      x->quantize_b_8x8(x, idx, DCT_DCT, 16);
-      vp9_short_idct8x8(dqcoeff, diff, 32);
-    }
-  } else {
-    for (i = 0; i < 4; i++) {
-      int idx = ib + iblock[i];
-      int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
-      int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
-      int16_t* const src_diff =
-          raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx,
-                                    x->plane[0].src_diff);
-      int16_t* const diff =
-          raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx,
-                                    xd->plane[0].diff);
-
-      assert(idx < 16);
-      tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
-      if (tx_type != DCT_DCT) {
-        vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-        x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
-        vp9_short_iht4x4(dqcoeff, diff, 16, tx_type);
-      } else if (!(i & 1) &&
-                 get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
-        x->fwd_txm8x4(src_diff, coeff, 32);
-        x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
-        vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
-                                    dqcoeff, diff, 32);
-        vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i] + 1],
-                                    dqcoeff + 16, diff + 4, 32);
-        i++;
-      } else {
-        x->fwd_txm4x4(src_diff, coeff, 32);
-        x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
-        vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
-                                    dqcoeff, diff, 32);
-      }
-    }
-  }
-
-  // reconstruct submacroblock
-  for (i = 0; i < 4; i++) {
-    int16_t* const diff =
-        raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i],
-                                  xd->plane[0].diff);
-    uint8_t* const dst =
-        raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i],
-                                  xd->plane[0].dst.buf,
-                                  xd->plane[0].dst.stride);
-    vp9_recon_b_c(dst, diff, 16, dst, xd->plane[0].dst.stride);
-  }
-}
-
-void vp9_encode_intra8x8mby(MACROBLOCK *x) {
-  int i;
-
-  for (i = 0; i < 4; i++)
-    vp9_encode_intra8x8(x, vp9_i8x8_block[i]);
-}
-
-static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
-  MACROBLOCKD * const xd = &x->e_mbd;
-  int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib);
-  int16_t* const coeff = MB_SUBBLOCK_FIELD(x, coeff, ib);
-  const int plane = ib < 20 ? 1 : 2;
-  const int block = ib < 20 ? ib - 16 : ib - 20;
-  uint8_t* const src =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block,
-                                x->plane[plane].src.buf,
-                                x->plane[plane].src.stride);
-  int16_t* const src_diff =
-      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block,
-                                x->plane[plane].src_diff);
-  int16_t* const diff =
-      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block,
-                                xd->plane[plane].diff);
-  uint8_t* const dst =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block,
-                                xd->plane[plane].dst.buf,
-                                xd->plane[plane].dst.stride);
-
-  assert(ib >= 16 && ib < 24);
-  vp9_intra_uv4x4_predict(&x->e_mbd, ib, mode,
-                          dst, xd->plane[plane].dst.stride);
-
-  assert(xd->plane[1].subsampling_x == 1);
-  vp9_subtract_block(4, 4, src_diff, 8,
-                     src, x->plane[plane].src.stride,
-                     dst, xd->plane[plane].dst.stride);
-
-  x->fwd_txm4x4(src_diff, coeff, 16);
-  x->quantize_b_4x4(x, ib, DCT_DCT, 16);
-  vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
-                              dqcoeff, diff, 16);
-
-  vp9_recon_uv_b_c(dst, diff, dst, xd->plane[plane].dst.stride);
-}
-
-void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
-  int i;
-
-  for (i = 0; i < 4; i++) {
-    int mode = x->e_mbd.mode_info_context->bmi[vp9_i8x8_block[i]].as_mode.first;
-
-    encode_intra_uv4x4(x, i + 16, mode);  // u
-    encode_intra_uv4x4(x, i + 20, mode);  // v
-  }
-}
-#endif
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
index a4f4c184ba8cc49ed20beff47af8809de5863752..c2620049425f35f8d82064c5ea96e3f747554c42 100644
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -17,10 +17,4 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
 void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
 void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
 void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs);
-#if !CONFIG_SB8X8
-void vp9_encode_intra8x8mby(MACROBLOCK *x);
-void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
-void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
-#endif
-
 #endif  // VP9_ENCODER_VP9_ENCODEINTRA_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 6e28f90cde36221ff47ebde1f7ac475ac0007d7f..42105275339aaa9245c2bb34775d3c5103a6387d 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -404,9 +404,6 @@ void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
   struct optimize_block_args arg = {cm, x, &ctx};
   vp9_optimize_init(&x->e_mbd, bsize, &ctx);
   foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
-#if !CONFIG_SB8X8
-  0,
-#endif
                                      optimize_block, &arg);
 }
 
@@ -551,9 +548,6 @@ void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x,
   struct encode_b_args arg = {cm, x, NULL};
 
   foreach_transformed_block_in_plane(xd, bsize, 0,
-#if !CONFIG_SB8X8
-                                     0,
-#endif
                                      xform_quant, &arg);
 }
 
@@ -576,9 +570,6 @@ void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x,
     vp9_optimize_init(xd, bsize, &ctx);
 
   foreach_transformed_block_in_plane(xd, bsize, 0,
-#if !CONFIG_SB8X8
-                                     0,
-#endif
                                      encode_block, &arg);
 
   vp9_recon_sby(xd, bsize);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index db5dd6c97988bf6f968309974bec2113e0403b85..e19ff7672e0733c4830ad6e5ae19214ba67cfae9 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -521,9 +521,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
       int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 
       set_mi_row_col(cm, xd,
-                     mb_row << CONFIG_SB8X8,
+                     mb_row << 1,
                      1 << mi_height_log2(BLOCK_SIZE_MB16X16),
-                     mb_col << CONFIG_SB8X8,
+                     mb_col << 1,
                      1 << mi_height_log2(BLOCK_SIZE_MB16X16));
 
       xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
@@ -626,8 +626,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
           this_error = motion_error;
           vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
           xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-          vp9_build_inter_predictors_sby(xd, mb_row << CONFIG_SB8X8,
-                                         mb_col << CONFIG_SB8X8,
+          vp9_build_inter_predictors_sby(xd, mb_row << 1,
+                                         mb_col << 1,
                                          BLOCK_SIZE_MB16X16);
           vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16);
           sum_mvr += mv.as_mv.row;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index af62ec3941032bfbd43c4ca26ef95f4693879f40..689615d4f3d614e9666688dbb64424da9e439aeb 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -386,7 +386,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
       // goes in segment 0
       if (arf_not_zz[offset + mb_col]) {
         ncnt[0]++;
-#if CONFIG_SB8X8
         cpi->segmentation_map[offset * 4 + 2 * mb_col] = 0;
         cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 0;
         cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 0;
@@ -396,11 +395,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
         cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 1;
         cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 1;
         cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols + 1] = 1;
-#else
-        cpi->segmentation_map[offset + mb_col] = 0;
-      } else {
-        cpi->segmentation_map[offset + mb_col] = 1;
-#endif
         ncnt[1]++;
       }
     }
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index 88cd1f41b94f3818bf0bf1f07dac261d5ebc1d31..e26daf0c936c1f2419efd71d520e57703398cc64 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -41,10 +41,6 @@ void vp9_init_mode_costs(VP9_COMP *c) {
                   x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
   vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                   x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
-#if !CONFIG_SB8X8
-  vp9_cost_tokens(c->mb.i8x8_mode_costs,
-                  x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree);
-#endif
 
   for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
     vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 60b7abf336bb07ae385fef7d47c4f7a044230528..ffee34eb7274ccb884f91bcc28ea76af5e582391 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -617,9 +617,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
   sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500;
 
   sf->thresh_mult[THR_B_PRED   ] += speed_multiplier * 2500;
-#if !CONFIG_SB8X8
-  sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500;
-#endif
 
   sf->thresh_mult[THR_NEWMV    ] += speed_multiplier * 1000;
   sf->thresh_mult[THR_NEWG     ] += speed_multiplier * 1000;
@@ -858,10 +855,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   }
 
   cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;
-#if !CONFIG_SB8X8
-  cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
-  cpi->mb.quantize_b_8x8      = vp9_regular_quantize_b_8x8;
-#endif
 
   vp9_init_quantizer(cpi);
 
@@ -1622,12 +1615,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8,
       NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
 
-#if CONFIG_SB8X8
   BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
       NULL, NULL, NULL, NULL, NULL, NULL)
+
   BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
       NULL, NULL, NULL, NULL, NULL, NULL)
-#endif
+
   BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
       NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
 
@@ -3307,13 +3300,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
     vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count);
     vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count);
-#if !CONFIG_SB8X8
-    vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count);
-#endif
     vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count);
-#if !CONFIG_SB8X8
-    vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
-#endif
     vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count);
 #if CONFIG_COMP_INTERINTRA_PRED
     vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index c6abfe13eb6da17f5519d3ae3dfd06d732b8de94..cc91ba5d28bd9ad4e96d4484f37f2c6de5ae58ec 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -48,9 +48,9 @@
 #define KEY_FRAME_CONTEXT 5
 
 #if CONFIG_COMP_INTERINTRA_PRED
-#define MAX_MODES 54 - CONFIG_SB8X8
+#define MAX_MODES 53
 #else
-#define MAX_MODES 42 - CONFIG_SB8X8
+#define MAX_MODES 41
 #endif
 
 #define MIN_THRESHMULT  32
@@ -72,9 +72,6 @@ typedef struct {
   // Stats
   int y_modes[VP9_YMODES];
   int uv_modes[VP9_UV_MODES];
-#if !CONFIG_SB8X8
-  int i8x8_modes[VP9_I8X8_MODES];
-#endif
   int b_modes[B_MODE_COUNT];
   int inter_y_modes[MB_MODE_COUNT];
   int inter_uv_modes[VP9_UV_MODES];
@@ -102,13 +99,7 @@ typedef struct {
   vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
   vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
-#if !CONFIG_SB8X8
-  vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
-#endif
   vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
-#if !CONFIG_SB8X8
-  vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
-#endif
   vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
   vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
@@ -213,9 +204,6 @@ typedef enum {
   THR_SPLITA,
 
   THR_B_PRED,
-#if !CONFIG_SB8X8
-  THR_I8X8_PRED,
-#endif
 
   THR_COMP_ZEROLG,
   THR_COMP_NEARESTLG,
@@ -281,19 +269,12 @@ typedef struct {
 } SPEED_FEATURES;
 
 enum BlockSize {
-#if CONFIG_SB8X8
   BLOCK_4X4,
   BLOCK_4X8,
   BLOCK_8X4,
   BLOCK_8X8,
   BLOCK_8X16,
   BLOCK_16X8,
-#else
-  BLOCK_16X8 = PARTITIONING_16X8,
-  BLOCK_8X16 = PARTITIONING_8X16,
-  BLOCK_8X8 = PARTITIONING_8X8,
-  BLOCK_4X4 = PARTITIONING_4X4,
-#endif
   BLOCK_16X16,
   BLOCK_MAX_SEGMENTS,
   BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
@@ -468,13 +449,7 @@ typedef struct VP9_COMP {
   int sb_ymode_count [VP9_I32X32_MODES];
   int ymode_count[VP9_YMODES];        /* intra MB type cts this frame */
   int bmode_count[VP9_NKF_BINTRAMODES];
-#if !CONFIG_SB8X8
-  int i8x8_mode_count[VP9_I8X8_MODES];
-#endif
   int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS];
-#if !CONFIG_SB8X8
-  int mbsplit_count[VP9_NUMMBSPLITS];
-#endif
   int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
   unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 #if CONFIG_COMP_INTERINTRA_PRED
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 6cf99d64c1f4b40585835ff3fe0ff7b0dd6438ac..4ed8f632698c27d2802d239ad65b0f4e79160b7d 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -133,39 +133,6 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
            pt_scan, 1);
 }
 
-#if !CONFIG_SB8X8
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
-                                int y_blocks) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  const int *pt_scan = get_scan_8x8(tx_type);
-
-  quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
-           BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
-           64, mb->skip_block,
-           mb->plane[pb_idx.plane].zbin,
-           mb->plane[pb_idx.plane].round,
-           mb->plane[pb_idx.plane].quant,
-           mb->plane[pb_idx.plane].quant_shift,
-           BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
-           BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
-           xd->plane[pb_idx.plane].dequant,
-           mb->plane[pb_idx.plane].zbin_extra,
-           &xd->plane[pb_idx.plane].eobs[pb_idx.block],
-           pt_scan, 1);
-}
-
-/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
- * these two C functions if corresponding optimized routine is not available.
- * NEON optimized version implements currently the fast quantization for pair
- * of blocks. */
-void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2,
-                                     int y_blocks) {
-  vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks);
-  vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks);
-}
-#endif
-
 static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
   unsigned t;
   int l;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 42d339dfbb71072981e24e1f7891ecae338bdfcc..0f84b1a373f59518284933a15e1c27fb7ecb3ad9 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -138,13 +138,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
   vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob);
   vp9_copy(cc->bmode_prob, cm->fc.bmode_prob);
   vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob);
-#endif
   vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob);
-#endif
   vp9_copy(cc->partition_prob, cm->fc.partition_prob);
 
   // Stats
@@ -202,14 +196,8 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
   vp9_copy(cm->fc.ymode_prob, cc->ymode_prob);
   vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob);
   vp9_copy(cm->fc.bmode_prob, cc->bmode_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob);
-#endif
   vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
   vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob);
-#if !CONFIG_SB8X8
-  vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob);
-#endif
   vp9_copy(cm->fc.partition_prob, cc->partition_prob);
 
   // Stats
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index dcbdef3ba9d7e3f94e455b0b71dbe789df47eb3d..90d56b2d230ff062b4ed919ad124d46e3fef50b5 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -102,9 +102,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {SPLITMV,   ALTREF_FRAME, NONE},
 
   {I4X4_PRED,    INTRA_FRAME,  NONE},
-#if !CONFIG_SB8X8
-  {I8X8_PRED, INTRA_FRAME,  NONE},
-#endif
 
   /* compound prediction modes */
   {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
@@ -703,10 +700,7 @@ static void super_block_yrd(VP9_COMP *cpi,
 
   choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                            TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
-#if CONFIG_SB8X8
-                           - (bs < BLOCK_SIZE_MB16X16)
-#endif
-                           );
+                           - (bs < BLOCK_SIZE_MB16X16));
 }
 
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
@@ -724,39 +718,23 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   const int src_stride = x->plane[0].src.stride;
   uint8_t* const src =
       raster_block_offset_uint8(xd,
-#if CONFIG_SB8X8
                                 BLOCK_SIZE_SB8X8,
-#else
-                                BLOCK_SIZE_MB16X16,
-#endif
                                 0, ib,
                                 x->plane[0].src.buf, src_stride);
   int16_t* const src_diff =
       raster_block_offset_int16(xd,
-#if CONFIG_SB8X8
                                 BLOCK_SIZE_SB8X8,
-#else
-                                BLOCK_SIZE_MB16X16,
-#endif
                                 0, ib,
                                 x->plane[0].src_diff);
   int16_t* const diff =
       raster_block_offset_int16(xd,
-#if CONFIG_SB8X8
                                 BLOCK_SIZE_SB8X8,
-#else
-                                BLOCK_SIZE_MB16X16,
-#endif
                                 0, ib,
                                 xd->plane[0].diff);
   int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
   uint8_t* const dst =
       raster_block_offset_uint8(xd,
-#if CONFIG_SB8X8
                                 BLOCK_SIZE_SB8X8,
-#else
-                                BLOCK_SIZE_MB16X16,
-#endif
                                 0, ib,
                                 xd->plane[0].dst.buf, xd->plane[0].dst.stride);
   ENTROPY_CONTEXT ta = *a, tempa = *a;
@@ -770,7 +748,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
    * */
   DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
 
-  assert(ib < (16 >> (2 * CONFIG_SB8X8)));
+  assert(ib < 4);
 #if CONFIG_NEWBINTRAMODES
   xd->mode_info_context->bmi[ib].as_mode.context =
     vp9_find_bpred_context(xd, ib, dst, xd->plane[0].dst.stride);
@@ -799,23 +777,19 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 #endif
 
     vp9_intra4x4_predict(xd, ib,
-#if CONFIG_SB8X8
                          BLOCK_SIZE_SB8X8,
-#else
-                         BLOCK_SIZE_MB16X16,
-#endif
                          mode, dst, xd->plane[0].dst.stride);
-    vp9_subtract_block(4, 4, src_diff, 16 >> CONFIG_SB8X8,
+    vp9_subtract_block(4, 4, src_diff, 8,
                        src, src_stride,
                        dst, xd->plane[0].dst.stride);
 
     xd->mode_info_context->bmi[ib].as_mode.first = mode;
     tx_type = get_tx_type_4x4(xd, ib);
     if (tx_type != DCT_DCT) {
-      vp9_short_fht4x4(src_diff, coeff, 16 >> CONFIG_SB8X8, tx_type);
+      vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
       x->quantize_b_4x4(x, ib, tx_type, 16);
     } else {
-      x->fwd_txm4x4(src_diff, coeff, 32 >> CONFIG_SB8X8);
+      x->fwd_txm4x4(src_diff, coeff, 16);
       x->quantize_b_4x4(x, ib, tx_type, 16);
     }
 
@@ -848,19 +822,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 
   // inverse transform
   if (best_tx_type != DCT_DCT)
-    vp9_short_iht4x4(best_dqcoeff, diff, 16 >> CONFIG_SB8X8, best_tx_type);
+    vp9_short_iht4x4(best_dqcoeff, diff, 8, best_tx_type);
   else
-    xd->inv_txm4x4(best_dqcoeff, diff, 32 >> CONFIG_SB8X8);
+    xd->inv_txm4x4(best_dqcoeff, diff, 16);
 
   vp9_intra4x4_predict(xd, ib,
-#if CONFIG_SB8X8
                        BLOCK_SIZE_SB8X8,
-#else
-                       BLOCK_SIZE_MB16X16,
-#endif
                        *best_mode,
                        dst, xd->plane[0].dst.stride);
-  vp9_recon_b(dst, diff, 16 >> CONFIG_SB8X8,
+  vp9_recon_b(dst, diff, 8,
               dst, xd->plane[0].dst.stride);
 
   return best_rd;
@@ -875,7 +845,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
   int distortion = 0;
   int tot_rate_y = 0;
   int64_t total_rd = 0;
-  ENTROPY_CONTEXT t_above[4 >> CONFIG_SB8X8], t_left[4 >> CONFIG_SB8X8];
+  ENTROPY_CONTEXT t_above[2], t_left[2];
   int *bmode_costs;
 
   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
@@ -884,8 +854,8 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
   xd->mode_info_context->mbmi.mode = I4X4_PRED;
   bmode_costs = mb->inter_bmode_costs;
 
-  for (i = 0; i < (16 >> (2 * CONFIG_SB8X8)); i++) {
-    const int x_idx = i & (3 >> CONFIG_SB8X8), y_idx = i >> (2 >> CONFIG_SB8X8);
+  for (i = 0; i < 4; i++) {
+    const int x_idx = i & 1, y_idx = i >> 1;
     MODE_INFO *const mic = xd->mode_info_context;
     const int mis = xd->mode_info_stride;
     B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
@@ -893,11 +863,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
 #if CONFIG_NEWBINTRAMODES
     uint8_t* const dst =
         raster_block_offset_uint8(xd,
-#if CONFIG_SB8X8
                                   BLOCK_SIZE_SB8X8,
-#else
-                                  BLOCK_SIZE_MB16X16,
-#endif
                                   0, i,
                                   xd->plane[0].dst.buf,
                                   xd->plane[0].dst.stride);
@@ -995,246 +961,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
   return best_rd;
 }
 
-#if !CONFIG_SB8X8
-static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
-                                     B_PREDICTION_MODE *best_mode,
-                                     int *mode_costs,
-                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
-                                     int *bestrate, int *bestratey,
-                                     int *bestdistortion) {
-  VP9_COMMON *const cm = &cpi->common;
-  MB_PREDICTION_MODE mode;
-  MACROBLOCKD *xd = &x->e_mbd;
-  int64_t best_rd = INT64_MAX;
-  int distortion = 0, rate = 0;
-  ENTROPY_CONTEXT ta[2], tl[2], ta_temp[2], tl_temp[2];
-  // perform transformation of dimension 8x8
-  // note the input and output index mapping
-  int idx = (ib & 0x02) ? (ib + 2) : ib;
-  const int src_stride = x->plane[0].src.stride;
-  uint8_t* const src =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                x->plane[0].src.buf, src_stride);
-  int16_t* const src_diff =
-      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                x->plane[0].src_diff);
-  int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
-  uint8_t* const dst =
-      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-
-  assert(ib < 16);
-  vpx_memcpy(ta, a, sizeof(ta));
-  vpx_memcpy(tl, l, sizeof(tl));
-  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
-    int64_t this_rd;
-    int rate_t = 0;
-
-    // FIXME rate for compound mode and second intrapred mode
-    rate = mode_costs[mode];
-    xd->mode_info_context->bmi[ib].as_mode.first = mode;
-
-    vp9_intra8x8_predict(xd, ib, mode, dst, xd->plane[0].dst.stride);
-
-    vp9_subtract_block(8, 8, src_diff, 16,
-                       src, src_stride,
-                       dst, xd->plane[0].dst.stride);
-
-    vpx_memcpy(ta_temp, ta, sizeof(ta));
-    vpx_memcpy(tl_temp, tl, sizeof(tl));
-
-    if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-      TX_TYPE tx_type = get_tx_type_8x8(xd, ib);
-      if (tx_type != DCT_DCT)
-        vp9_short_fht8x8(src_diff, coeff, 16, tx_type);
-      else
-        x->fwd_txm8x8(src_diff, coeff, 32);
-      x->quantize_b_8x8(x, idx, tx_type, 16);
-
-      // compute quantization mse of 8x8 block
-      distortion = vp9_block_error_c(coeff,
-          BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
-
-      rate_t = cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC,
-                           ta_temp, tl_temp, TX_8X8, 16);
-
-      rate += rate_t;
-    } else {
-      static const int iblock[4] = {0, 1, 4, 5};
-      TX_TYPE tx_type;
-      int i;
-
-      distortion = 0;
-      rate_t = 0;
-      for (i = 0; i < 4; ++i) {
-        int16_t* const src_diff =
-            raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
-                                      0, ib + iblock[i],
-                                      x->plane[0].src_diff);
-        int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
-                                            ib + iblock[i], 16);
-        int do_two = 0;
-        tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
-        if (tx_type != DCT_DCT) {
-          vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-          x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
-        } else if (!(i & 1) &&
-                   get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
-          x->fwd_txm8x4(src_diff, coeff, 32);
-          x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
-          do_two = 1;
-        } else {
-          x->fwd_txm4x4(src_diff, coeff, 32);
-          x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
-        }
-        distortion += vp9_block_error_c(coeff,
-            BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
-            16 << do_two);
-        rate_t += cost_coeffs(cm, x, 0, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
-                              &ta_temp[i & 1], &tl_temp[i >> 1],
-                              TX_4X4, 16);
-        if (do_two) {
-          i++;
-          rate_t += cost_coeffs(cm, x, 0, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
-                                &ta_temp[i & 1], &tl_temp[i >> 1],
-                                TX_4X4, 16);
-        }
-      }
-      rate += rate_t;
-    }
-
-    distortion >>= 2;
-    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
-    if (this_rd < best_rd) {
-      *bestrate = rate;
-      *bestratey = rate_t;
-      *bestdistortion = distortion;
-      vpx_memcpy(a, ta_temp, sizeof(ta_temp));
-      vpx_memcpy(l, tl_temp, sizeof(tl_temp));
-      best_rd = this_rd;
-      *best_mode = mode;
-    }
-  }
-  xd->mode_info_context->bmi[ib].as_mode.first = (*best_mode);
-  vp9_encode_intra8x8(x, ib);
-
-  return best_rd;
-}
-
-static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
-                                         int *Rate, int *rate_y,
-                                         int *Distortion, int64_t best_rd) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  int i, ib;
-  int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED];
-  int distortion = 0;
-  int tot_rate_y = 0;
-  int64_t total_rd = 0;
-  ENTROPY_CONTEXT t_above[4], t_left[4];
-  int *i8x8mode_costs;
-
-  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
-  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
-  xd->mode_info_context->mbmi.mode = I8X8_PRED;
-  i8x8mode_costs  = mb->i8x8_mode_costs;
-
-  for (i = 0; i < 4; i++) {
-    const int x_idx = i & 1, y_idx = i >> 1;
-    MODE_INFO *const mic = xd->mode_info_context;
-    B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
-    int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
-
-    ib = vp9_i8x8_block[i];
-    total_rd += rd_pick_intra8x8block(cpi, mb, ib, &best_mode, i8x8mode_costs,
-                                      t_above + x_idx * 2, t_left + y_idx * 2,
-                                      &r, &ry, &d);
-    cost += r;
-    distortion += d;
-    tot_rate_y += ry;
-    mic->bmi[ib].as_mode.first = best_mode;
-  }
-
-  *Rate = cost;
-  *rate_y = tot_rate_y;
-  *Distortion = distortion;
-  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
-}
-
-static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x,
-                                                  int *rate, int *rate_y,
-                                                  int *distortion,
-                                                  int *mode8x8,
-                                                  int64_t best_yrd,
-                                                  int64_t *txfm_cache) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
-  int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
-  int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
-  int64_t tmp_rd_4x4s, tmp_rd_8x8s;
-  int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
-  int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
-
-  mbmi->txfm_size = TX_4X4;
-  tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
-                                         &d4x4, best_yrd);
-  mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-  mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-  mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-  mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-  mbmi->txfm_size = TX_8X8;
-  tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
-                                         &d8x8, best_yrd);
-  txfm_cache[ONLY_4X4]  = tmp_rd_4x4;
-  txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
-  txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
-  tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
-  tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
-  txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ?
-                               tmp_rd_4x4s : tmp_rd_8x8s;
-  if (cm->txfm_mode == TX_MODE_SELECT) {
-    if (tmp_rd_4x4s < tmp_rd_8x8s) {
-      *rate = r4x4 + cost0;
-      *rate_y = tok4x4 + cost0;
-      *distortion = d4x4;
-      mbmi->txfm_size = TX_4X4;
-      tmp_rd = tmp_rd_4x4s;
-    } else {
-      *rate = r8x8 + cost1;
-      *rate_y = tok8x8 + cost1;
-      *distortion = d8x8;
-      mbmi->txfm_size = TX_8X8;
-      tmp_rd = tmp_rd_8x8s;
-
-      mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-      mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-      mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-      mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-    }
-  } else if (cm->txfm_mode == ONLY_4X4) {
-    *rate = r4x4;
-    *rate_y = tok4x4;
-    *distortion = d4x4;
-    mbmi->txfm_size = TX_4X4;
-    tmp_rd = tmp_rd_4x4;
-  } else {
-    *rate = r8x8;
-    *rate_y = tok8x8;
-    *distortion = d8x8;
-    mbmi->txfm_size = TX_8X8;
-    tmp_rd = tmp_rd_8x8;
-
-    mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-    mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-    mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-    mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-  }
-
-  return tmp_rd;
-}
-#endif  // !CONFIG_SB8X8
-
 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                       int *rate, int *distortion,
                                       int *skippable, BLOCK_SIZE_TYPE bsize,
@@ -1329,7 +1055,6 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
   x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
 }
 
-#if CONFIG_SB8X8
 static int labels2mode(MACROBLOCK *x,
                        int const *labelings, int which_label,
                        B_PREDICTION_MODE this_mode,
@@ -1527,7 +1252,6 @@ typedef struct {
   int mvthresh;
   int *mdcounts;
 } BEST_SEG_INFO;
-#endif  // CONFIG_SB8X8
 
 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
   int r = 0;
@@ -1538,7 +1262,6 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
   return r;
 }
 
-#if CONFIG_SB8X8
 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     BEST_SEG_INFO *bsi,
                                     int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
@@ -1835,2591 +1558,802 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
   return (int)(bsi.segment_rd);
 }
 
-#else  // !CONFIG_SB8X8
-
-static int labels2mode(
-  MACROBLOCK *x,
-  int const *labelings, int which_label,
-  B_PREDICTION_MODE this_mode,
-  int_mv *this_mv, int_mv *this_second_mv,
-  int_mv seg_mvs[MAX_REF_FRAMES - 1],
-  int_mv *best_ref_mv,
-  int_mv *second_best_ref_mv,
-  int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mic = xd->mode_info_context;
-  MB_MODE_INFO * mbmi = &mic->mbmi;
-  const int mis = xd->mode_info_stride;
-
-  int i, cost = 0, thismvcost = 0;
-
-  /* We have to be careful retrieving previously-encoded motion vectors.
-     Ones from this macroblock have to be pulled from the BLOCKD array
-     as they have not yet made it to the bmi array in our MB_MODE_INFO. */
-  for (i = 0; i < 16; ++i) {
-    const int row = i >> 2,  col = i & 3;
-
-    B_PREDICTION_MODE m;
-
-    if (labelings[i] != which_label)
-      continue;
+static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
+                    uint8_t *ref_y_buffer, int ref_y_stride,
+                    int ref_frame, enum BlockSize block_size ) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  int_mv this_mv;
+  int i;
+  int zero_seen = 0;
+  int best_index = 0;
+  int best_sad = INT_MAX;
+  int this_sad = INT_MAX;
 
-    if (col  &&  labelings[i] == labelings[i - 1])
-      m = LEFT4X4;
-    else if (row  &&  labelings[i] == labelings[i - 4])
-      m = ABOVE4X4;
-    else {
-      // the only time we should do costing for new motion vector or mode
-      // is when we are on a new label  (jbb May 08, 2007)
-      switch (m = this_mode) {
-        case NEW4X4 :
-          if (mbmi->second_ref_frame > 0) {
-            this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int;
-            this_second_mv->as_int =
-              seg_mvs[mbmi->second_ref_frame - 1].as_int;
-          }
+  uint8_t *src_y_ptr = x->plane[0].src.buf;
+  uint8_t *ref_y_ptr;
+  int row_offset, col_offset;
 
-          thismvcost  = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
-                                        102, xd->allow_high_precision_mv);
-          if (mbmi->second_ref_frame > 0) {
-            thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
-                                          mvjcost, mvcost, 102,
-                                          xd->allow_high_precision_mv);
-          }
-          break;
-        case LEFT4X4:
-          this_mv->as_int = col ? mic->bmi[i - 1].as_mv[0].as_int :
-                                  left_block_mv(xd, mic, i);
-          if (mbmi->second_ref_frame > 0)
-            this_second_mv->as_int = col ? mic->bmi[i - 1].as_mv[1].as_int :
-                                           left_block_second_mv(xd, mic, i);
-          break;
-        case ABOVE4X4:
-          this_mv->as_int = row ? mic->bmi[i - 4].as_mv[0].as_int :
-                                  above_block_mv(mic, i, mis);
-          if (mbmi->second_ref_frame > 0)
-            this_second_mv->as_int = row ? mic->bmi[i - 4].as_mv[1].as_int :
-                                           above_block_second_mv(mic, i, mis);
-          break;
-        case ZERO4X4:
-          this_mv->as_int = 0;
-          if (mbmi->second_ref_frame > 0)
-            this_second_mv->as_int = 0;
-          break;
-        default:
-          break;
-      }
+  // Get the sad for each candidate reference mv
+  for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
+    this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
 
-      if (m == ABOVE4X4) { // replace above with left if same
-        int_mv left_mv, left_second_mv;
+    // The list is at an end if we see 0 for a second time.
+    if (!this_mv.as_int && zero_seen)
+      break;
+    zero_seen = zero_seen || !this_mv.as_int;
 
-        left_second_mv.as_int = 0;
-        left_mv.as_int = col ? mic->bmi[i - 1].as_mv[0].as_int :
-                         left_block_mv(xd, mic, i);
-        if (mbmi->second_ref_frame > 0)
-          left_second_mv.as_int = col ? mic->bmi[i - 1].as_mv[1].as_int :
-                                  left_block_second_mv(xd, mic, i);
+    row_offset = this_mv.as_mv.row >> 3;
+    col_offset = this_mv.as_mv.col >> 3;
+    ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
 
-        if (left_mv.as_int == this_mv->as_int &&
-            (mbmi->second_ref_frame <= 0 ||
-             left_second_mv.as_int == this_second_mv->as_int))
-          m = LEFT4X4;
-      }
+    // Find sad for current vector.
+    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
+                                           ref_y_ptr, ref_y_stride,
+                                           0x7fffffff);
 
-#if CONFIG_NEWBINTRAMODES
-      cost = x->inter_bmode_costs[
-          m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m];
-#else
-      cost = x->inter_bmode_costs[m];
-#endif
+    // Note if it is the best so far.
+    if (this_sad < best_sad) {
+      best_sad = this_sad;
+      best_index = i;
     }
-
-    mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
-    if (mbmi->second_ref_frame > 0)
-      mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
-
-    x->partition_info->bmi[i].mode = m;
-    x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
-    if (mbmi->second_ref_frame > 0)
-      x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
   }
 
-  cost += thismvcost;
-  return cost;
+  // Note the index of the mv that worked best in the reference list.
+  x->mv_best_ref_index[ref_frame] = best_index;
 }
 
-static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
-                                       MACROBLOCK *x,
-                                       int const *labels,
-                                       int which_label,
-                                       int *labelyrate,
-                                       int *distortion,
-                                       ENTROPY_CONTEXT *ta,
-                                       ENTROPY_CONTEXT *tl) {
-  int i;
-  MACROBLOCKD *xd = &x->e_mbd;
-
-  *labelyrate = 0;
-  *distortion = 0;
-  for (i = 0; i < 16; i++) {
-    if (labels[i] == which_label) {
-      const int src_stride = x->plane[0].src.stride;
-      uint8_t* const src =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
-                                    x->plane[0].src.buf, src_stride);
-      int16_t* const src_diff =
-          raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, i,
-                                    x->plane[0].src_diff);
-      int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
-      uint8_t* const pre =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
-                                    xd->plane[0].pre[0].buf,
-                                    xd->plane[0].pre[0].stride);
-      uint8_t* const dst =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
-                                    xd->plane[0].dst.buf,
-                                    xd->plane[0].dst.stride);
-      int thisdistortion;
-
-      vp9_build_inter_predictor(pre,
-                                xd->plane[0].pre[0].stride,
-                                dst,
-                                xd->plane[0].dst.stride,
-                                &xd->mode_info_context->bmi[i].as_mv[0],
-                                &xd->scale_factor[0],
-                                4, 4, 0 /* no avg */, &xd->subpix);
+extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
+static void estimate_curframe_refprobs(VP9_COMP *cpi,
+                                       vp9_prob mod_refprobs[3],
+                                       int pred_ref) {
+  int norm_cnt[MAX_REF_FRAMES];
+  const int *const rfct = cpi->count_mb_ref_frame_usage;
+  int intra_count = rfct[INTRA_FRAME];
+  int last_count  = rfct[LAST_FRAME];
+  int gf_count    = rfct[GOLDEN_FRAME];
+  int arf_count   = rfct[ALTREF_FRAME];
 
-      // TODO(debargha): Make this work properly with the
-      // implicit-compoundinter-weight experiment when implicit
-      // weighting for splitmv modes is turned on.
-      if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
-        uint8_t* const second_pre =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
-                                    xd->plane[0].pre[1].buf,
-                                    xd->plane[0].pre[1].stride);
-        vp9_build_inter_predictor(
-            second_pre, xd->plane[0].pre[1].stride,
-            dst, xd->plane[0].dst.stride,
-            &xd->mode_info_context->bmi[i].as_mv[1],
-            &xd->scale_factor[1], 4, 4, 1,
-            &xd->subpix);
-      }
-
-      vp9_subtract_block(4, 4, src_diff, 16,
-                         src, src_stride,
-                         dst, xd->plane[0].dst.stride);
-      x->fwd_txm4x4(src_diff, coeff, 32);
-      x->quantize_b_4x4(x, i, DCT_DCT, 16);
-      thisdistortion = vp9_block_error(coeff,
-          BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
-      *distortion += thisdistortion;
-      *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC,
-                                 ta + (i & 3),
-                                 tl + (i >> 2), TX_4X4, 16);
-    }
+  // Work out modified reference frame probabilities to use where prediction
+  // of the reference frame fails
+  if (pred_ref == INTRA_FRAME) {
+    norm_cnt[0] = 0;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = arf_count;
+    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+    mod_refprobs[0] = 0;    // This branch implicit
+  } else if (pred_ref == LAST_FRAME) {
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = 0;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = arf_count;
+    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+    mod_refprobs[1] = 0;    // This branch implicit
+  } else if (pred_ref == GOLDEN_FRAME) {
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = 0;
+    norm_cnt[3] = arf_count;
+    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+    mod_refprobs[2] = 0;  // This branch implicit
+  } else {
+    norm_cnt[0] = intra_count;
+    norm_cnt[1] = last_count;
+    norm_cnt[2] = gf_count;
+    norm_cnt[3] = 0;
+    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
+    mod_refprobs[2] = 0;  // This branch implicit
   }
-  *distortion >>= 2;
-  return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
 }
 
-static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
-                                           MACROBLOCK *x,
-                                           int const *labels,
-                                           int which_label,
-                                           int *labelyrate,
-                                           int *distortion,
-                                           int64_t *otherrd,
-                                           ENTROPY_CONTEXT *ta,
-                                           ENTROPY_CONTEXT *tl) {
-  int i, j;
-  MACROBLOCKD *xd = &x->e_mbd;
-  const int iblock[4] = { 0, 1, 4, 5 };
-  int othercost = 0, otherdist = 0;
-  ENTROPY_CONTEXT tac[4], tlc[4];
+static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
+                                     int idx, int val, int weight) {
+  unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
+  unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
+  // weight is 16-bit fixed point, so this basically calculates:
+  // 0.5 + weight * cost1 + (1.0 - weight) * cost0
+  return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
+}
+
+static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
+                                     unsigned int *ref_costs) {
+  VP9_COMMON *cm = &cpi->common;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+  vp9_prob *mod_refprobs;
+
+  unsigned int cost;
+  int pred_ref;
+  int pred_flag;
+  int pred_ctx;
+  int i;
+
+  vp9_prob pred_prob, new_pred_prob;
+  int seg_ref_active;
+  int seg_ref_count = 0;
+  seg_ref_active = vp9_segfeature_active(xd,
+                                         segment_id,
+                                         SEG_LVL_REF_FRAME);
 
-  if (otherrd) {
-    memcpy(&tac, ta, sizeof(tac));
-    memcpy(&tlc, tl, sizeof(tlc));
+  if (seg_ref_active) {
+    seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME)  +
+                    vp9_check_segref(xd, segment_id, LAST_FRAME)   +
+                    vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
+                    vp9_check_segref(xd, segment_id, ALTREF_FRAME);
   }
 
-  *distortion = 0;
-  *labelyrate = 0;
-  for (i = 0; i < 4; i++) {
-    int ib = vp9_i8x8_block[i];
+  // Get the predicted reference for this mb
+  pred_ref = vp9_get_pred_ref(cm, xd);
 
-    if (labels[ib] == which_label) {
-      const int use_second_ref =
-          xd->mode_info_context->mbmi.second_ref_frame > 0;
-      int which_mv;
-      const int idx = (ib & 8) + ((ib & 2) << 1);
-      const int src_stride = x->plane[0].src.stride;
-      uint8_t* const src =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                    x->plane[0].src.buf, src_stride);
-      int16_t* const src_diff =
-          raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                    x->plane[0].src_diff);
-      int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
-      int thisdistortion;
-      uint8_t* const dst =
-          raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                    xd->plane[0].dst.buf,
-                                    xd->plane[0].dst.stride);
-
-      assert(idx < 16);
-      for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
-        uint8_t* const pre =
-            raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
-                                      xd->plane[0].pre[which_mv].buf,
-                                      xd->plane[0].pre[which_mv].stride);
-
-        // TODO(debargha): Make this work properly with the
-        // implicit-compoundinter-weight experiment when implicit
-        // weighting for splitmv modes is turned on.
-        vp9_build_inter_predictor(
-            pre, xd->plane[0].pre[which_mv].stride,
-            dst, xd->plane[0].dst.stride,
-            &xd->mode_info_context->bmi[ib].as_mv[which_mv],
-            &xd->scale_factor[which_mv], 8, 8,
-            which_mv, &xd->subpix);
-      }
+  // Get the context probability for the prediction flag (based on last frame)
+  pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
 
-      vp9_subtract_block(8, 8, src_diff, 16,
-                         src, src_stride,
-                         dst, xd->plane[0].dst.stride);
+  // Predict probability for current frame based on stats so far
+  pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF);
+  new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0],
+                                  cpi->ref_pred_count[pred_ctx][1]);
 
-      if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
-        if (otherrd) {
-          x->fwd_txm8x8(src_diff, coeff, 32);
-          x->quantize_b_8x8(x, idx, DCT_DCT, 16);
-          thisdistortion = vp9_block_error_c(coeff,
-              BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
-          otherdist += thisdistortion;
-          xd->mode_info_context->mbmi.txfm_size = TX_8X8;
-          othercost += cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC,
-                                   tac + (i & 1) * 2,
-                                   tlc + (i & 2),
-                                   TX_8X8, 16);
-          xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-        }
-        for (j = 0; j < 4; j += 2) {
-          int16_t* const src_diff =
-              raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
-                                        0, ib + iblock[j],
-                                        x->plane[0].src_diff);
-          int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
-                                              ib + iblock[j], 16);
-          x->fwd_txm8x4(src_diff, coeff, 32);
-          x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
-          thisdistortion = vp9_block_error_c(coeff,
-              BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
-          *distortion += thisdistortion;
-          *labelyrate +=
-              cost_coeffs(cm, x, 0, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
-                          ta + (i & 1) * 2,
-                          tl + (i & 2) + ((j & 2) >> 1),
-                          TX_4X4, 16);
-          *labelyrate +=
-              cost_coeffs(cm, x, 0, ib + iblock[j] + 1,
-                          PLANE_TYPE_Y_WITH_DC,
-                          ta + (i & 1) * 2 + 1,
-                          tl + (i & 2) + ((j & 2) >> 1),
-                          TX_4X4, 16);
+  // Get the set of probabilities to use if prediction fails
+  mod_refprobs = cm->mod_refprobs[pred_ref];
+
+  // For each possible selected reference frame work out a cost.
+  for (i = 0; i < MAX_REF_FRAMES; i++) {
+    if (seg_ref_active && seg_ref_count == 1) {
+      cost = 0;
+    } else {
+      pred_flag = (i == pred_ref);
+
+      // Get the prediction for the current mb
+      cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
+                           pred_flag, cpi->seg0_progress);
+      if (cost > 1024) cost = 768;  // i.e. account for 4 bits max.
+
+      // for incorrectly predicted cases
+      if (!pred_flag) {
+        vp9_prob curframe_mod_refprobs[3];
+
+        if (cpi->seg0_progress) {
+          estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref);
+        } else {
+          vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs));
         }
-      } else /* 8x8 */ {
-        if (otherrd) {
-          for (j = 0; j < 4; j += 2) {
-            int16_t* const src_diff =
-                raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
-                                          0, ib + iblock[j],
-                                          x->plane[0].src_diff);
-            int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
-                                                ib + iblock[j], 16);
-            x->fwd_txm8x4(src_diff, coeff, 32);
-            x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
-            thisdistortion = vp9_block_error_c(coeff,
-                BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
-            otherdist += thisdistortion;
-            xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-            othercost +=
-                cost_coeffs(cm, x, 0, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
-                            tac + (i & 1) * 2,
-                            tlc + (i & 2) + ((j & 2) >> 1),
-                            TX_4X4, 16);
-            othercost +=
-                cost_coeffs(cm, x, 0, ib + iblock[j] + 1,
-                            PLANE_TYPE_Y_WITH_DC,
-                            tac + (i & 1) * 2 + 1,
-                            tlc + (i & 2) + ((j & 2) >> 1),
-                            TX_4X4, 16);
-            xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+
+        cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0,
+                              (i != INTRA_FRAME), cpi->seg0_progress);
+        if (i != INTRA_FRAME) {
+          cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1,
+                                (i != LAST_FRAME), cpi->seg0_progress);
+          if (i != LAST_FRAME) {
+            cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2,
+                                  (i != GOLDEN_FRAME), cpi->seg0_progress);
           }
         }
-        x->fwd_txm8x8(src_diff, coeff, 32);
-        x->quantize_b_8x8(x, idx, DCT_DCT, 16);
-        thisdistortion = vp9_block_error_c(coeff,
-            BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
-        *distortion += thisdistortion;
-        *labelyrate += cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC,
-                                   ta + (i & 1) * 2,
-                                   tl + (i & 2),
-                                   TX_8X8, 16);
       }
     }
+
+    ref_costs[i] = cost;
   }
-  *distortion >>= 2;
-  if (otherrd) {
-    otherdist >>= 2;
-    *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
-  }
-  return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
 }
 
-static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
-
+static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+                                 int mode_index,
+                                 PARTITION_INFO *partition,
+                                 int_mv *ref_mv,
+                                 int_mv *second_ref_mv,
+                                 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
+                                 int64_t txfm_size_diff[NB_TXFM_MODES]) {
+  MACROBLOCKD *const xd = &x->e_mbd;
 
-typedef struct {
-  int_mv *ref_mv, *second_ref_mv;
-  int_mv mvp;
+  // Take a snapshot of the coding context so it can be
+  // restored if we decide to encode this way
+  ctx->skip = x->skip;
+  ctx->best_mode_index = mode_index;
+  vpx_memcpy(&ctx->mic, xd->mode_info_context,
+             sizeof(MODE_INFO));
+  if (partition)
+    vpx_memcpy(&ctx->partition_info, partition,
+               sizeof(PARTITION_INFO));
+  ctx->best_ref_mv.as_int = ref_mv->as_int;
+  ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
 
-  int64_t segment_rd;
-  SPLITMV_PARTITIONING_TYPE segment_num;
-  TX_SIZE txfm_size;
-  int r;
-  int d;
-  int segment_yrate;
-  B_PREDICTION_MODE modes[16];
-  int_mv mvs[16], second_mvs[16];
-  int eobs[16];
+  ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
+  ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
+  ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
 
-  int mvthresh;
-  int *mdcounts;
+  memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
+}
 
-  int_mv sv_mvp[4];     // save 4 mvp from 8x8
-  int sv_istep[2];  // save 2 initial step_param for 16x8/8x16
+static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
+                               int idx, MV_REFERENCE_FRAME frame_type,
+                               enum BlockSize block_size,
+                               int mi_row, int mi_col,
+                               int_mv frame_nearest_mv[MAX_REF_FRAMES],
+                               int_mv frame_near_mv[MAX_REF_FRAMES],
+                               int frame_mdcounts[4][4],
+                               YV12_BUFFER_CONFIG yv12_mb[4],
+                               struct scale_factors scale[MAX_REF_FRAMES]) {
+  VP9_COMMON *cm = &cpi->common;
+  YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+  int use_prev_in_find_mv_refs;
 
-} BEST_SEG_INFO;
+  // set up scaling factors
+  scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
+  scale[frame_type].x_offset_q4 =
+      (mi_col * MI_SIZE * scale[frame_type].x_num /
+       scale[frame_type].x_den) & 0xf;
+  scale[frame_type].y_offset_q4 =
+      (mi_row * MI_SIZE * scale[frame_type].y_num /
+       scale[frame_type].y_den) & 0xf;
 
-static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
-                                    BEST_SEG_INFO *bsi,
-                                    SPLITMV_PARTITIONING_TYPE segmentation,
-                                    TX_SIZE tx_size, int64_t *otherrds,
-                                    int64_t *rds, int *completed,
-                                    /* 16 = n_blocks */
-                                    int_mv seg_mvs[16 /* n_blocks */]
-                                                  [MAX_REF_FRAMES - 1]) {
-  int i, j;
-  int const *labels;
-  int br = 0, bd = 0;
-  B_PREDICTION_MODE this_mode;
-  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+  // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
+  // use the UV scaling factors.
+  setup_pred_block(&yv12_mb[frame_type], yv12, mi_row, mi_col,
+                   &scale[frame_type], &scale[frame_type]);
 
-  int label_count;
-  int64_t this_segment_rd = 0, other_segment_rd;
-  int label_mv_thresh;
-  int rate = 0;
-  int sbr = 0, sbd = 0;
-  int segmentyrate = 0;
-  int best_eobs[16] = { 0 };
+  // Gets an initial list of candidate vectors from neighbours and orders them
+  use_prev_in_find_mv_refs = cm->width == cm->last_width &&
+                             cm->height == cm->last_height &&
+                             !cpi->common.error_resilient_mode;
+  vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
+                   use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL,
+                   frame_type,
+                   mbmi->ref_mvs[frame_type],
+                   cpi->common.ref_frame_sign_bias);
 
-  vp9_variance_fn_ptr_t *v_fn_ptr;
+  // Candidate refinement carried out at encoder and decoder
+  vp9_find_best_ref_mvs(xd,
+                        mbmi->ref_mvs[frame_type],
+                        &frame_nearest_mv[frame_type],
+                        &frame_near_mv[frame_type]);
 
-  ENTROPY_CONTEXT t_above[4], t_left[4];
-  ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
+  // Further refinement that is encode side only to test the top few candidates
+  // in full and choose the best as the centre point for subsequent searches.
+  // The current implementation doesn't support scaling.
+  if (scale[frame_type].x_num == scale[frame_type].x_den &&
+      scale[frame_type].y_num == scale[frame_type].y_den)
+    mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride,
+            frame_type, block_size);
+}
 
-  vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
-  vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
 
-  v_fn_ptr = &cpi->fn_ptr[segmentation];
-  labels = vp9_mbsplits[segmentation];
-  label_count = vp9_mbsplit_count[segmentation];
+static enum BlockSize get_block_size(int bw, int bh) {
+  if (bw == 4 && bh == 4)
+    return BLOCK_4X4;
 
-  // 64 makes this threshold really big effectively
-  // making it so that we very rarely check mvs on
-  // segments.   setting this to 1 would make mv thresh
-  // roughly equal to what it is for macroblocks
-  label_mv_thresh = 1 * bsi->mvthresh / label_count;
+  if (bw == 4 && bh == 8)
+    return BLOCK_4X8;
 
-  // Segmentation method overheads
-  rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs,
-                    vp9_mbsplit_encodings + segmentation);
-  rate += vp9_cost_mv_ref(cpi, SPLITMV,
-                          mbmi->mb_mode_context[mbmi->ref_frame]);
-  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
-  br += rate;
-  other_segment_rd = this_segment_rd;
+  if (bw == 8 && bh == 4)
+    return BLOCK_8X4;
 
-  mbmi->txfm_size = tx_size;
-  for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
-    int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
-    int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
-    B_PREDICTION_MODE mode_selected = ZERO4X4;
-    int bestlabelyrate = 0;
+  if (bw == 8 && bh == 8)
+    return BLOCK_8X8;
 
-    // search for the best motion vector on this segment
-    for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
-      int64_t this_rd, other_rd;
-      int distortion;
-      int labelyrate;
-      ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
-
-      vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
-      vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
+  if (bw == 8 && bh == 16)
+    return BLOCK_8X16;
 
-      // motion search for newmv (single predictor case only)
-      if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
-        int sseshift, n;
-        int step_param = 0;
-        int further_steps;
-        int thissme, bestsme = INT_MAX;
-        const struct buf_2d orig_src = x->plane[0].src;
-        const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
+  if (bw == 16 && bh == 8)
+    return BLOCK_16X8;
 
-        /* Is the best so far sufficiently good that we cant justify doing
-         * and new motion search. */
-        if (best_label_rd < label_mv_thresh)
-          break;
+  if (bw == 16 && bh == 16)
+    return BLOCK_16X16;
 
-        if (cpi->compressor_speed) {
-          if (segmentation == PARTITIONING_8X16 ||
-              segmentation == PARTITIONING_16X8) {
-            bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
-            if (i == 1 && segmentation == PARTITIONING_16X8)
-              bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
+  if (bw == 32 && bh == 32)
+    return BLOCK_32X32;
 
-            step_param = bsi->sv_istep[i];
-          }
+  if (bw == 32 && bh == 16)
+    return BLOCK_32X16;
 
-          // use previous block's result as next block's MV predictor.
-          if (segmentation == PARTITIONING_4X4 && i > 0) {
-            bsi->mvp.as_int =
-              x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
-            if (i == 4 || i == 8 || i == 12)
-              bsi->mvp.as_int =
-                x->e_mbd.mode_info_context->bmi[i - 4].as_mv[0].as_int;
-            step_param = 2;
-          }
-        }
+  if (bw == 16 && bh == 32)
+    return BLOCK_16X32;
 
-        further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+  if (bw == 64 && bh == 32)
+    return BLOCK_64X32;
 
-        {
-          int sadpb = x->sadperbit4;
-          int_mv mvp_full;
+  if (bw == 32 && bh == 64)
+    return BLOCK_32X64;
 
-          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
-          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+  if (bw == 64 && bh == 64)
+    return BLOCK_64X64;
 
-          // find first label
-          n = vp9_mbsplit_offset[segmentation][i];
+  assert(0);
+  return -1;
+}
 
-          // adjust src pointer for this segment
-          x->plane[0].src.buf =
-              raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n,
-                                        x->plane[0].src.buf,
-                                        x->plane[0].src.stride);
-          assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0xf) == 0);
-          x->e_mbd.plane[0].pre[0].buf =
-              raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n,
-                                        x->e_mbd.plane[0].pre[0].buf,
-                                        x->e_mbd.plane[0].pre[0].stride);
+static void model_rd_from_var_lapndz(int var, int n, int qstep,
+                                     int *rate, int *dist) {
+  // This function models the rate and distortion for a Laplacian
+  // source with given variance when quantized with a uniform quantizer
+  // with given stepsize. The closed form expressions are in:
+  // Hang and Chen, "Source Model for transform video coder and its
+  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
+  // Sys. for Video Tech., April 1997.
+  // The function is implemented as piecewise approximation to the
+  // exact computation.
+  // TODO(debargha): Implement the functions by interpolating from a
+  // look-up table
+  vp9_clear_system_state();
+  {
+    double D, R;
+    double s2 = (double) var / n;
+    double s = sqrt(s2);
+    double x = qstep / s;
+    if (x > 1.0) {
+      double y = exp(-x / 2);
+      double y2 = y * y;
+      D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
+      R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
+    } else {
+      double x2 = x * x;
+      D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
+      if (x > 0.125)
+        R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
+                 0.1626989668625);
+      else
+        R = -1.442252874826093 * log(x) + 1.944647760719664;
+    }
+    if (R < 0) {
+      *rate = 0;
+      *dist = var;
+    } else {
+      *rate = (n * R * 256 + 0.5);
+      *dist = (n * D * s2 + 0.5);
+    }
+  }
+  vp9_clear_system_state();
+}
 
-          bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
-                                           sadpb, further_steps, 0, v_fn_ptr,
-                                           bsi->ref_mv, &mode_mv[NEW4X4]);
+static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
+                            MACROBLOCK *x, MACROBLOCKD *xd,
+                            int *out_rate_sum, int *out_dist_sum) {
+  // Note our transform coeffs are 8 times an orthogonal transform.
+  // Hence quantizer step is also 8 times. To get effective quantizer
+  // we need to divide by 8 before sending to modeling function.
+  unsigned int sse, var;
+  int i, rate_sum = 0, dist_sum = 0;
 
-          sseshift = segmentation_to_sseshift[segmentation];
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblock_plane *const p = &x->plane[i];
+    struct macroblockd_plane *const pd = &xd->plane[i];
 
-          // Should we do a full search (best quality only)
-          if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
-            /* Check if mvp_full is within the range. */
-            clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
-                     x->mv_row_min, x->mv_row_max);
+    const int bwl = b_width_log2(bsize) - pd->subsampling_x;
+    const int bhl = b_height_log2(bsize) - pd->subsampling_y;
+    const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl);
+    int rate, dist;
+    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+                             pd->dst.buf, pd->dst.stride, &sse);
+    model_rd_from_var_lapndz(var, 16 << (bwl + bhl),
+                             pd->dequant[1] >> 3, &rate, &dist);
 
-            thissme = cpi->full_search_sad(x, &mvp_full,
-                                           sadpb, 16, v_fn_ptr,
-                                           x->nmvjointcost, x->mvcost,
-                                           bsi->ref_mv,
-                                           n);
+    rate_sum += rate;
+    dist_sum += dist;
+  }
 
-            if (thissme < bestsme) {
-              bestsme = thissme;
-              mode_mv[NEW4X4].as_int =
-                x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
-            } else {
-              /* The full search result is actually worse so re-instate the
-               * previous best vector */
-              x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
-                mode_mv[NEW4X4].as_int;
-            }
-          }
-        }
+  *out_rate_sum = rate_sum;
+  *out_dist_sum = dist_sum;
+}
 
-        if (bestsme < INT_MAX) {
-          int distortion;
-          unsigned int sse;
-          cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4],
-                                       bsi->ref_mv, x->errorperbit, v_fn_ptr,
-                                       x->nmvjointcost, x->mvcost,
-                                       &distortion, &sse);
+static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
+  switch (bs) {
+    case BLOCK_64X64: return BLOCK_32X32;
+    case BLOCK_64X32: return BLOCK_32X16;
+    case BLOCK_32X64: return BLOCK_16X32;
+    case BLOCK_32X32: return BLOCK_16X16;
+    case BLOCK_32X16: return BLOCK_16X8;
+    case BLOCK_16X32: return BLOCK_8X16;
+    case BLOCK_16X16: return BLOCK_8X8;
+    case BLOCK_16X8:  return BLOCK_8X4;
+    case BLOCK_8X16:  return BLOCK_4X8;
+    case BLOCK_8X8:   return BLOCK_4X4;
+    default:
+      assert(0);
+      return -1;
+  }
+}
 
-          // safe motion search result for use in compound prediction
-          seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
-        }
+static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
+  switch (bs) {
+    case BLOCK_SIZE_SB64X64: return BLOCK_64X64;
+    case BLOCK_SIZE_SB64X32: return BLOCK_64X32;
+    case BLOCK_SIZE_SB32X64: return BLOCK_32X64;
+    case BLOCK_SIZE_SB32X32: return BLOCK_32X32;
+    case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
+    case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
+    case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
+    case BLOCK_SIZE_SB16X8:  return BLOCK_16X8;
+    case BLOCK_SIZE_SB8X16:  return BLOCK_8X16;
+    case BLOCK_SIZE_SB8X8:   return BLOCK_8X8;
+    default:
+      assert(0);
+      return -1;
+  }
+}
 
-        // restore src pointers
-        x->plane[0].src = orig_src;
-        x->e_mbd.plane[0].pre[0] = orig_pre;
-      } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
-        /* NEW4X4 */
-        /* motion search not completed? Then skip newmv for this block with
-         * comppred */
-        if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
-            seg_mvs[i][mbmi->ref_frame        - 1].as_int == INVALID_MV) {
-          continue;
-        }
-      }
+static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
+                                 BLOCK_SIZE_TYPE bsize,
+                                 int mdcounts[4], int64_t txfm_cache[],
+                                 int *rate2, int *distortion, int *skippable,
+                                 int *compmode_cost,
+#if CONFIG_COMP_INTERINTRA_PRED
+                                 int *compmode_interintra_cost,
+#endif
+                                 int *rate_y, int *distortion_y,
+                                 int *rate_uv, int *distortion_uv,
+                                 int *mode_excluded, int *disable_skip,
+                                 int mode_index,
+                                 INTERPOLATIONFILTERTYPE *best_filter,
+                                 int_mv frame_mv[MB_MODE_COUNT]
+                                                [MAX_REF_FRAMES],
+                                 YV12_BUFFER_CONFIG *scaled_ref_frame,
+                                 int mi_row, int mi_col) {
+  const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
+  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
+  const enum BlockSize uv_block_size = y_to_uv_block_size(block_size);
+  VP9_COMMON *cm = &cpi->common;
+  MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  const int is_comp_pred = (mbmi->second_ref_frame > 0);
+#if CONFIG_COMP_INTERINTRA_PRED
+  const int is_comp_interintra_pred = (mbmi->second_ref_frame == INTRA_FRAME);
+#endif
+  const int num_refs = is_comp_pred ? 2 : 1;
+  const int this_mode = mbmi->mode;
+  int i;
+  int refs[2] = { mbmi->ref_frame,
+                  (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
+  int_mv cur_mv[2];
+  int_mv ref_mv[2];
+  int64_t this_rd = 0;
+  unsigned char tmp_ybuf[64 * 64];
+  unsigned char tmp_ubuf[32 * 32];
+  unsigned char tmp_vbuf[32 * 32];
+  int pred_exists = 0;
+  int interpolating_intpel_seen = 0;
+  int intpel_mv;
+  int64_t rd, best_rd = INT64_MAX;
 
-      rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
-                         &second_mode_mv[this_mode], seg_mvs[i],
-                         bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
-                         x->mvcost, cpi);
+  switch (this_mode) {
+    case NEWMV:
+      ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
+      ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
 
-      // Trap vectors that reach beyond the UMV borders
-      if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
-          ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
-          ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
-          ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
-        continue;
-      }
-      if (mbmi->second_ref_frame > 0 &&
-          mv_check_bounds(x, &second_mode_mv[this_mode]))
-        continue;
-
-      if (segmentation == PARTITIONING_4X4) {
-        this_rd = encode_inter_mb_segment(&cpi->common,
-                                          x, labels, i, &labelyrate,
-                                          &distortion, t_above_s, t_left_s);
-        other_rd = this_rd;
-      } else {
-        this_rd = encode_inter_mb_segment_8x8(&cpi->common,
-                                              x, labels, i, &labelyrate,
-                                              &distortion, &other_rd,
-                                              t_above_s, t_left_s);
-      }
-      this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
-      rate += labelyrate;
-
-      if (this_rd < best_label_rd) {
-        sbr = rate;
-        sbd = distortion;
-        bestlabelyrate = labelyrate;
-        mode_selected = this_mode;
-        best_label_rd = this_rd;
-        if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
-          for (j = 0; j < 16; j++)
-            if (labels[j] == i)
-              best_eobs[j] = x->e_mbd.plane[0].eobs[j];
-        } else {
-          for (j = 0; j < 4; j++) {
-            int ib = vp9_i8x8_block[j], idx = j * 4;
-
-            if (labels[ib] == i)
-              best_eobs[idx] = x->e_mbd.plane[0].eobs[idx];
-          }
-        }
-        if (other_rd < best_other_rd)
-          best_other_rd = other_rd;
-
-        vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
-        vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
-
-      }
-    } /*for each 4x4 mode*/
-
-    vpx_memcpy(t_above, t_above_b, sizeof(t_above));
-    vpx_memcpy(t_left, t_left_b, sizeof(t_left));
-
-    labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
-                &second_mode_mv[mode_selected], seg_mvs[i],
-                bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
-                x->mvcost, cpi);
-
-    br += sbr;
-    bd += sbd;
-    segmentyrate += bestlabelyrate;
-    this_segment_rd += best_label_rd;
-    other_segment_rd += best_other_rd;
-    if (rds)
-      rds[i] = this_segment_rd;
-    if (otherrds)
-      otherrds[i] = other_segment_rd;
-  } /* for each label */
-
-  if (this_segment_rd < bsi->segment_rd) {
-    bsi->r = br;
-    bsi->d = bd;
-    bsi->segment_yrate = segmentyrate;
-    bsi->segment_rd = this_segment_rd;
-    bsi->segment_num = segmentation;
-    bsi->txfm_size = mbmi->txfm_size;
-
-    // store everything needed to come back to this!!
-    for (i = 0; i < 16; i++) {
-      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
-      if (mbmi->second_ref_frame > 0)
-        bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
-      bsi->modes[i] = x->partition_info->bmi[i].mode;
-      bsi->eobs[i] = best_eobs[i];
-    }
-  }
-
-  if (completed) {
-    *completed = i;
-  }
-}
-
-static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
-                             BEST_SEG_INFO *bsi,
-                             unsigned int segmentation,
-                             /* 16 = n_blocks */
-                             int_mv seg_mvs[16][MAX_REF_FRAMES - 1],
-                             int64_t txfm_cache[NB_TXFM_MODES]) {
-  int i, n, c = vp9_mbsplit_count[segmentation];
-
-  if (segmentation == PARTITIONING_4X4) {
-    int64_t rd[16];
-
-    rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL,
-                            rd, &n, seg_mvs);
-    if (n == c) {
-      for (i = 0; i < NB_TXFM_MODES; i++) {
-        if (rd[c - 1] < txfm_cache[i])
-          txfm_cache[i] = rd[c - 1];
-      }
-    }
-  } else {
-    int64_t diff, base_rd;
-    int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0);
-    int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1);
-
-    if (cpi->common.txfm_mode == TX_MODE_SELECT) {
-      int64_t rd4x4[4], rd8x8[4];
-      int n4x4, n8x8, nmin;
-      BEST_SEG_INFO bsi4x4, bsi8x8;
-
-      /* factor in cost of cost4x4/8x8 in decision */
-      vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi));
-      vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi));
-      rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation,
-                              TX_4X4, NULL, rd4x4, &n4x4, seg_mvs);
-      rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation,
-                              TX_8X8, NULL, rd8x8, &n8x8, seg_mvs);
-      if (bsi4x4.segment_num == segmentation) {
-        bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
-        if (bsi4x4.segment_rd < bsi->segment_rd)
-          vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi));
-      }
-      if (bsi8x8.segment_num == segmentation) {
-        bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
-        if (bsi8x8.segment_rd < bsi->segment_rd)
-          vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi));
-      }
-      n = n4x4 > n8x8 ? n4x4 : n8x8;
-      if (n == c) {
-        nmin = n4x4 < n8x8 ? n4x4 : n8x8;
-        diff = rd8x8[nmin - 1] - rd4x4[nmin - 1];
-        if (n == n4x4) {
-          base_rd = rd4x4[c - 1];
-        } else {
-          base_rd = rd8x8[c - 1] - diff;
-        }
-      }
-    } else {
-      int64_t rd[4], otherrd[4];
-
-      if (cpi->common.txfm_mode == ONLY_4X4) {
-        rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd,
-                                rd, &n, seg_mvs);
-        if (n == c) {
-          base_rd = rd[c - 1];
-          diff = otherrd[c - 1] - rd[c - 1];
-        }
-      } else /* use 8x8 transform */ {
-        rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd,
-                                rd, &n, seg_mvs);
-        if (n == c) {
-          diff = rd[c - 1] - otherrd[c - 1];
-          base_rd = otherrd[c - 1];
-        }
-      }
-    }
-
-    if (n == c) {
-      if (base_rd < txfm_cache[ONLY_4X4]) {
-        txfm_cache[ONLY_4X4] = base_rd;
-      }
-      if (base_rd + diff < txfm_cache[ALLOW_8X8]) {
-        txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] =
-            txfm_cache[ALLOW_32X32] = base_rd + diff;
-      }
-      if (diff < 0) {
-        base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
-      } else {
-        base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
-      }
-      if (base_rd < txfm_cache[TX_MODE_SELECT]) {
-        txfm_cache[TX_MODE_SELECT] = base_rd;
-      }
-    }
-  }
-}
-
-static INLINE void cal_step_param(int sr, int *sp) {
-  int step = 0;
-
-  if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
-  else if (sr < 1) sr = 1;
-
-  while (sr >>= 1)
-    step++;
-
-  *sp = MAX_MVSEARCH_STEPS - 1 - step;
-}
-
-static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
-                                       int_mv *best_ref_mv,
-                                       int_mv *second_best_ref_mv,
-                                       int64_t best_rd,
-                                       int *mdcounts,
-                                       int *returntotrate,
-                                       int *returnyrate,
-                                       int *returndistortion,
-                                       int *skippable, int mvthresh,
-                                       int_mv seg_mvs[NB_PARTITIONINGS]
-                                                     [16 /* n_blocks */]
-                                                     [MAX_REF_FRAMES - 1],
-                                       int64_t txfm_cache[NB_TXFM_MODES]) {
-  int i;
-  BEST_SEG_INFO bsi;
-  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-
-  vpx_memset(&bsi, 0, sizeof(bsi));
-  for (i = 0; i < NB_TXFM_MODES; i++)
-    txfm_cache[i] = INT64_MAX;
-
-  bsi.segment_rd = best_rd;
-  bsi.ref_mv = best_ref_mv;
-  bsi.second_ref_mv = second_best_ref_mv;
-  bsi.mvp.as_int = best_ref_mv->as_int;
-  bsi.mvthresh = mvthresh;
-  bsi.mdcounts = mdcounts;
-  bsi.txfm_size = TX_4X4;
-
-  for (i = 0; i < 16; i++)
-    bsi.modes[i] = ZERO4X4;
-
-  if (cpi->compressor_speed == 0) {
-    /* for now, we will keep the original segmentation order
-       when in best quality mode */
-    rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
-                     seg_mvs[PARTITIONING_16X8], txfm_cache);
-    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
-                     seg_mvs[PARTITIONING_8X16], txfm_cache);
-    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
-                     seg_mvs[PARTITIONING_8X8], txfm_cache);
-    rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
-                     seg_mvs[PARTITIONING_4X4], txfm_cache);
-  } else {
-    int sr;
-
-    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
-                     seg_mvs[PARTITIONING_8X8], txfm_cache);
-
-    if (bsi.segment_rd < best_rd) {
-      int tmp_col_min = x->mv_col_min;
-      int tmp_col_max = x->mv_col_max;
-      int tmp_row_min = x->mv_row_min;
-      int tmp_row_max = x->mv_row_max;
-
-      vp9_clamp_mv_min_max(x, best_ref_mv);
-
-      /* Get 8x8 result */
-      bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
-      bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
-      bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
-      bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
-
-      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
-       * according to the closeness of 2 MV. */
-      /* block 8X16 */
-      sr = MAX(abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row) >> 3,
-               abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col) >> 3);
-      cal_step_param(sr, &bsi.sv_istep[0]);
-
-      sr = MAX(abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row) >> 3,
-               abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col) >> 3);
-      cal_step_param(sr, &bsi.sv_istep[1]);
-
-      rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
-                       seg_mvs[PARTITIONING_8X16], txfm_cache);
-
-      /* block 16X8 */
-      sr = MAX(abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row) >> 3,
-               abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col) >> 3);
-      cal_step_param(sr, &bsi.sv_istep[0]);
-
-      sr = MAX(abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row) >> 3,
-               abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col) >> 3);
-      cal_step_param(sr, &bsi.sv_istep[1]);
-
-      rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
-                       seg_mvs[PARTITIONING_16X8], txfm_cache);
-
-      /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
-      /* Not skip 4x4 if speed=0 (good quality) */
-      if (cpi->sf.no_skip_block4x4_search ||
-          bsi.segment_num == PARTITIONING_8X8) {
-        /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
-        bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
-        rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
-                         seg_mvs[PARTITIONING_4X4], txfm_cache);
-      }
-
-      /* restore UMV window */
-      x->mv_col_min = tmp_col_min;
-      x->mv_col_max = tmp_col_max;
-      x->mv_row_min = tmp_row_min;
-      x->mv_row_max = tmp_row_max;
-    }
-  }
-
-  /* set it to the best */
-  for (i = 0; i < 16; i++) {
-    x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
-    if (mbmi->second_ref_frame > 0)
-      x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
-        bsi.second_mvs[i].as_int;
-    x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
-  }
-
-  /* save partitions */
-  mbmi->txfm_size = bsi.txfm_size;
-  mbmi->partitioning = bsi.segment_num;
-  x->partition_info->count = vp9_mbsplit_count[bsi.segment_num];
-
-  for (i = 0; i < x->partition_info->count; i++) {
-    int j;
-
-    j = vp9_mbsplit_offset[bsi.segment_num][i];
-
-    x->partition_info->bmi[i].mode = bsi.modes[j];
-    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
-    if (mbmi->second_ref_frame > 0)
-      x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv;
-  }
-  /*
-   * used to set mbmi->mv.as_int
-   */
-  x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
-  if (mbmi->second_ref_frame > 0)
-    x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int;
-
-  *returntotrate = bsi.r;
-  *returndistortion = bsi.d;
-  *returnyrate = bsi.segment_yrate;
-  *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_MB16X16);
-
-  return (int)(bsi.segment_rd);
-}
-#endif  // !CONFIG_SB8X8
-
-static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
-                    uint8_t *ref_y_buffer, int ref_y_stride,
-                    int ref_frame, enum BlockSize block_size ) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
-  int_mv this_mv;
-  int i;
-  int zero_seen = 0;
-  int best_index = 0;
-  int best_sad = INT_MAX;
-  int this_sad = INT_MAX;
-
-  uint8_t *src_y_ptr = x->plane[0].src.buf;
-  uint8_t *ref_y_ptr;
-  int row_offset, col_offset;
-
-  // Get the sad for each candidate reference mv
-  for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
-    this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
-
-    // The list is at an end if we see 0 for a second time.
-    if (!this_mv.as_int && zero_seen)
-      break;
-    zero_seen = zero_seen || !this_mv.as_int;
-
-    row_offset = this_mv.as_mv.row >> 3;
-    col_offset = this_mv.as_mv.col >> 3;
-    ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
-
-    // Find sad for current vector.
-    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
-                                           ref_y_ptr, ref_y_stride,
-                                           0x7fffffff);
-
-    // Note if it is the best so far.
-    if (this_sad < best_sad) {
-      best_sad = this_sad;
-      best_index = i;
-    }
-  }
-
-  // Note the index of the mv that worked best in the reference list.
-  x->mv_best_ref_index[ref_frame] = best_index;
-}
-
-#if !CONFIG_SB8X8
-static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) {
-  int i;
-  MACROBLOCKD *xd = &x->e_mbd;
-  for (i = 0; i < 4; i++) {
-    int ib = vp9_i8x8_block[i];
-    xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[i];
-    xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[i];
-    xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[i];
-    xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[i];
-    // printf("%d,%d,%d,%d\n",
-    //       modes[0], modes[1], modes[2], modes[3]);
-  }
-}
-#endif
-
-extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
-static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) {
-  int norm_cnt[MAX_REF_FRAMES];
-  const int *const rfct = cpi->count_mb_ref_frame_usage;
-  int intra_count = rfct[INTRA_FRAME];
-  int last_count  = rfct[LAST_FRAME];
-  int gf_count    = rfct[GOLDEN_FRAME];
-  int arf_count   = rfct[ALTREF_FRAME];
-
-  // Work out modified reference frame probabilities to use where prediction
-  // of the reference frame fails
-  if (pred_ref == INTRA_FRAME) {
-    norm_cnt[0] = 0;
-    norm_cnt[1] = last_count;
-    norm_cnt[2] = gf_count;
-    norm_cnt[3] = arf_count;
-    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
-    mod_refprobs[0] = 0;    // This branch implicit
-  } else if (pred_ref == LAST_FRAME) {
-    norm_cnt[0] = intra_count;
-    norm_cnt[1] = 0;
-    norm_cnt[2] = gf_count;
-    norm_cnt[3] = arf_count;
-    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
-    mod_refprobs[1] = 0;    // This branch implicit
-  } else if (pred_ref == GOLDEN_FRAME) {
-    norm_cnt[0] = intra_count;
-    norm_cnt[1] = last_count;
-    norm_cnt[2] = 0;
-    norm_cnt[3] = arf_count;
-    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
-    mod_refprobs[2] = 0;  // This branch implicit
-  } else {
-    norm_cnt[0] = intra_count;
-    norm_cnt[1] = last_count;
-    norm_cnt[2] = gf_count;
-    norm_cnt[3] = 0;
-    vp9_calc_ref_probs(norm_cnt, mod_refprobs);
-    mod_refprobs[2] = 0;  // This branch implicit
-  }
-}
-
-static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
-                                     int idx, int val, int weight) {
-  unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
-  unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
-  // weight is 16-bit fixed point, so this basically calculates:
-  // 0.5 + weight * cost1 + (1.0 - weight) * cost0
-  return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
-}
-
-static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &cpi->mb.e_mbd;
-  vp9_prob *mod_refprobs;
-
-  unsigned int cost;
-  int pred_ref;
-  int pred_flag;
-  int pred_ctx;
-  int i;
-
-  vp9_prob pred_prob, new_pred_prob;
-  int seg_ref_active;
-  int seg_ref_count = 0;
-  seg_ref_active = vp9_segfeature_active(xd,
-                                         segment_id,
-                                         SEG_LVL_REF_FRAME);
-
-  if (seg_ref_active) {
-    seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME)  +
-                    vp9_check_segref(xd, segment_id, LAST_FRAME)   +
-                    vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
-                    vp9_check_segref(xd, segment_id, ALTREF_FRAME);
-  }
-
-  // Get the predicted reference for this mb
-  pred_ref = vp9_get_pred_ref(cm, xd);
-
-  // Get the context probability for the prediction flag (based on last frame)
-  pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
-
-  // Predict probability for current frame based on stats so far
-  pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF);
-  new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0],
-                                  cpi->ref_pred_count[pred_ctx][1]);
-
-  // Get the set of probabilities to use if prediction fails
-  mod_refprobs = cm->mod_refprobs[pred_ref];
-
-  // For each possible selected reference frame work out a cost.
-  for (i = 0; i < MAX_REF_FRAMES; i++) {
-    if (seg_ref_active && seg_ref_count == 1) {
-      cost = 0;
-    } else {
-      pred_flag = (i == pred_ref);
-
-      // Get the prediction for the current mb
-      cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
-                           pred_flag, cpi->seg0_progress);
-      if (cost > 1024) cost = 768; // i.e. account for 4 bits max.
-
-      // for incorrectly predicted cases
-      if (! pred_flag) {
-        vp9_prob curframe_mod_refprobs[3];
-
-        if (cpi->seg0_progress) {
-          estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref);
-        } else {
-          vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs));
-        }
-
-        cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0,
-                              (i != INTRA_FRAME), cpi->seg0_progress);
-        if (i != INTRA_FRAME) {
-          cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1,
-                                (i != LAST_FRAME), cpi->seg0_progress);
-          if (i != LAST_FRAME) {
-            cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2,
-                                  (i != GOLDEN_FRAME), cpi->seg0_progress);
-          }
-        }
-      }
-    }
-
-    ref_costs[i] = cost;
-  }
-}
-
-static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-                                 int mode_index,
-                                 PARTITION_INFO *partition,
-                                 int_mv *ref_mv,
-                                 int_mv *second_ref_mv,
-                                 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
-                                 int64_t txfm_size_diff[NB_TXFM_MODES]) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-  // Take a snapshot of the coding context so it can be
-  // restored if we decide to encode this way
-  ctx->skip = x->skip;
-  ctx->best_mode_index = mode_index;
-  vpx_memcpy(&ctx->mic, xd->mode_info_context,
-             sizeof(MODE_INFO));
-  if (partition)
-    vpx_memcpy(&ctx->partition_info, partition,
-               sizeof(PARTITION_INFO));
-  ctx->best_ref_mv.as_int = ref_mv->as_int;
-  ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
-
-  ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
-  ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
-  ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
-
-  memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
-}
-
-static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
-                               int idx, MV_REFERENCE_FRAME frame_type,
-                               enum BlockSize block_size,
-                               int mi_row, int mi_col,
-                               int_mv frame_nearest_mv[MAX_REF_FRAMES],
-                               int_mv frame_near_mv[MAX_REF_FRAMES],
-                               int frame_mdcounts[4][4],
-                               YV12_BUFFER_CONFIG yv12_mb[4],
-                               struct scale_factors scale[MAX_REF_FRAMES]) {
-  VP9_COMMON *cm = &cpi->common;
-  YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
-  int use_prev_in_find_mv_refs;
-
-  // set up scaling factors
-  scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
-  scale[frame_type].x_offset_q4 =
-      (mi_col * MI_SIZE * scale[frame_type].x_num /
-       scale[frame_type].x_den) & 0xf;
-  scale[frame_type].y_offset_q4 =
-      (mi_row * MI_SIZE * scale[frame_type].y_num /
-       scale[frame_type].y_den) & 0xf;
-
-  // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
-  // use the UV scaling factors.
-  setup_pred_block(&yv12_mb[frame_type], yv12, mi_row, mi_col,
-                   &scale[frame_type], &scale[frame_type]);
-
-  // Gets an initial list of candidate vectors from neighbours and orders them
-  use_prev_in_find_mv_refs = cm->width == cm->last_width &&
-                             cm->height == cm->last_height &&
-                             !cpi->common.error_resilient_mode;
-  vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
-                   use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL,
-                   frame_type,
-                   mbmi->ref_mvs[frame_type],
-                   cpi->common.ref_frame_sign_bias);
-
-  // Candidate refinement carried out at encoder and decoder
-  vp9_find_best_ref_mvs(xd,
-                        mbmi->ref_mvs[frame_type],
-                        &frame_nearest_mv[frame_type],
-                        &frame_near_mv[frame_type]);
-
-  // Further refinement that is encode side only to test the top few candidates
-  // in full and choose the best as the centre point for subsequent searches.
-  // The current implementation doesn't support scaling.
-  if (scale[frame_type].x_num == scale[frame_type].x_den &&
-      scale[frame_type].y_num == scale[frame_type].y_den)
-    mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride,
-            frame_type, block_size);
-}
-
-static void model_rd_from_var_lapndz(int var, int n, int qstep,
-                                     int *rate, int *dist) {
-  // This function models the rate and distortion for a Laplacian
-  // source with given variance when quantized with a uniform quantizer
-  // with given stepsize. The closed form expressions are in:
-  // Hang and Chen, "Source Model for transform video coder and its
-  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
-  // Sys. for Video Tech., April 1997.
-  // The function is implemented as piecewise approximation to the
-  // exact computation.
-  // TODO(debargha): Implement the functions by interpolating from a
-  // look-up table
-  vp9_clear_system_state();
-  {
-    double D, R;
-    double s2 = (double) var / n;
-    double s = sqrt(s2);
-    double x = qstep / s;
-    if (x > 1.0) {
-      double y = exp(-x / 2);
-      double y2 = y * y;
-      D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
-      R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
-    } else {
-      double x2 = x * x;
-      D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
-      if (x > 0.125)
-        R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
-                 0.1626989668625);
-      else
-        R = -1.442252874826093 * log(x) + 1.944647760719664;
-    }
-    if (R < 0) {
-      *rate = 0;
-      *dist = var;
-    } else {
-      *rate = (n * R * 256 + 0.5);
-      *dist = (n * D * s2 + 0.5);
-    }
-  }
-  vp9_clear_system_state();
-}
-
-static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
-  switch (bs) {
-    case BLOCK_64X64: return BLOCK_32X32;
-    case BLOCK_64X32: return BLOCK_32X16;
-    case BLOCK_32X64: return BLOCK_16X32;
-    case BLOCK_32X32: return BLOCK_16X16;
-    case BLOCK_32X16: return BLOCK_16X8;
-    case BLOCK_16X32: return BLOCK_8X16;
-    case BLOCK_16X16: return BLOCK_8X8;
-#if CONFIG_SB8X8
-    case BLOCK_16X8:  return BLOCK_8X4;
-    case BLOCK_8X16:  return BLOCK_4X8;
-    case BLOCK_8X8:   return BLOCK_4X4;
-#endif
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
-static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
-  switch (bs) {
-    case BLOCK_SIZE_SB64X64: return BLOCK_64X64;
-    case BLOCK_SIZE_SB64X32: return BLOCK_64X32;
-    case BLOCK_SIZE_SB32X64: return BLOCK_32X64;
-    case BLOCK_SIZE_SB32X32: return BLOCK_32X32;
-    case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
-    case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
-    case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
-#if CONFIG_SB8X8
-    case BLOCK_SIZE_SB16X8:  return BLOCK_16X8;
-    case BLOCK_SIZE_SB8X16:  return BLOCK_8X16;
-    case BLOCK_SIZE_SB8X8:   return BLOCK_8X8;
-#endif
-    default:
-      assert(0);
-      return -1;
-  }
-}
-
-static enum BlockSize get_block_size(int bw, int bh) {
-#if CONFIG_SB8X8
-  if (bw == 4 && bh == 4)
-    return BLOCK_4X4;
-
-  if (bw == 4 && bh == 8)
-    return BLOCK_4X8;
-
-  if (bw == 8 && bh == 4)
-    return BLOCK_8X4;
-
-  if (bw == 8 && bh == 8)
-    return BLOCK_8X8;
-
-  if (bw == 8 && bh == 16)
-    return BLOCK_8X16;
-
-  if (bw == 16 && bh == 8)
-    return BLOCK_16X8;
-#else
-  if (bw == 16 && bh == 8)
-    return BLOCK_16X8;
-
-  if (bw == 8 && bh == 16)
-    return BLOCK_8X16;
-
-  if (bw == 8 && bh == 8)
-    return BLOCK_8X8;
-
-  if (bw == 4 && bh == 4)
-    return BLOCK_4X4;
-#endif
-  if (bw == 16 && bh == 16)
-    return BLOCK_16X16;
-
-  if (bw == 32 && bh == 32)
-    return BLOCK_32X32;
-
-  if (bw == 32 && bh == 16)
-    return BLOCK_32X16;
-
-  if (bw == 16 && bh == 32)
-    return BLOCK_16X32;
-
-  if (bw == 64 && bh == 32)
-    return BLOCK_64X32;
-
-  if (bw == 32 && bh == 64)
-    return BLOCK_32X64;
-
-  if (bw == 64 && bh == 64)
-    return BLOCK_64X64;
-
-  assert(0);
-  return -1;
-}
-
-static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
-                            MACROBLOCK *x, MACROBLOCKD *xd,
-                            int *out_rate_sum, int *out_dist_sum) {
-  // Note our transform coeffs are 8 times an orthogonal transform.
-  // Hence quantizer step is also 8 times. To get effective quantizer
-  // we need to divide by 8 before sending to modeling function.
-  unsigned int sse, var;
-  int i, rate_sum = 0, dist_sum = 0;
-
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    struct macroblock_plane *const p = &x->plane[i];
-    struct macroblockd_plane *const pd = &xd->plane[i];
-
-    const int bwl = b_width_log2(bsize) - pd->subsampling_x;
-    const int bhl = b_height_log2(bsize) - pd->subsampling_y;
-    const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl);
-    int rate, dist;
-    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
-                             pd->dst.buf, pd->dst.stride, &sse);
-    model_rd_from_var_lapndz(var, 16 << (bwl + bhl),
-                             pd->dequant[1] >> 3, &rate, &dist);
-
-    rate_sum += rate;
-    dist_sum += dist;
-  }
-
-  *out_rate_sum = rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                                 BLOCK_SIZE_TYPE bsize,
-                                 int mdcounts[4], int64_t txfm_cache[],
-                                 int *rate2, int *distortion, int *skippable,
-                                 int *compmode_cost,
-#if CONFIG_COMP_INTERINTRA_PRED
-                                 int *compmode_interintra_cost,
-#endif
-                                 int *rate_y, int *distortion_y,
-                                 int *rate_uv, int *distortion_uv,
-                                 int *mode_excluded, int *disable_skip,
-                                 int mode_index,
-                                 INTERPOLATIONFILTERTYPE *best_filter,
-                                 int_mv frame_mv[MB_MODE_COUNT]
-                                                [MAX_REF_FRAMES],
-                                 YV12_BUFFER_CONFIG *scaled_ref_frame,
-                                 int mi_row, int mi_col) {
-  const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
-  const enum BlockSize block_size = y_bsizet_to_block_size(bsize);
-  const enum BlockSize uv_block_size = y_to_uv_block_size(block_size);
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
-  const int is_comp_pred = (mbmi->second_ref_frame > 0);
-#if CONFIG_COMP_INTERINTRA_PRED
-  const int is_comp_interintra_pred = (mbmi->second_ref_frame == INTRA_FRAME);
-#endif
-  const int num_refs = is_comp_pred ? 2 : 1;
-  const int this_mode = mbmi->mode;
-  int i;
-  int refs[2] = { mbmi->ref_frame,
-                  (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
-  int_mv cur_mv[2];
-  int_mv ref_mv[2];
-  int64_t this_rd = 0;
-  unsigned char tmp_ybuf[64 * 64];
-  unsigned char tmp_ubuf[32 * 32];
-  unsigned char tmp_vbuf[32 * 32];
-  int pred_exists = 0;
-  int interpolating_intpel_seen = 0;
-  int intpel_mv;
-  int64_t rd, best_rd = INT64_MAX;
-
-  switch (this_mode) {
-    case NEWMV:
-      ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
-      ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
-
-      if (is_comp_pred) {
-        if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
-            frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
-          return INT64_MAX;
-        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]],
-                                  &ref_mv[0],
-                                  x->nmvjointcost, x->mvcost, 96,
-                                  x->e_mbd.allow_high_precision_mv);
-        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]],
-                                  &ref_mv[1],
-                                  x->nmvjointcost, x->mvcost, 96,
-                                  x->e_mbd.allow_high_precision_mv);
-      } else {
-        struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
-        int bestsme = INT_MAX;
-        int further_steps, step_param = cpi->sf.first_step;
-        int sadpb = x->sadperbit16;
-        int_mv mvp_full, tmp_mv;
-        int sr = 0;
-
-        int tmp_col_min = x->mv_col_min;
-        int tmp_col_max = x->mv_col_max;
-        int tmp_row_min = x->mv_row_min;
-        int tmp_row_max = x->mv_row_max;
-
-        if (scaled_ref_frame) {
-          int i;
-
-          // Swap out the reference frame for a version that's been scaled to
-          // match the resolution of the current frame, allowing the existing
-          // motion search code to be used without additional modifications.
-          for (i = 0; i < MAX_MB_PLANE; i++)
-            backup_yv12[i] = xd->plane[i].pre[0];
-
-          setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col,
-                           NULL, NULL);
-        }
-
-        vp9_clamp_mv_min_max(x, &ref_mv[0]);
-
-        sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
-
-        // mvp_full.as_int = ref_mv[0].as_int;
-        mvp_full.as_int =
-         mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
-
-        mvp_full.as_mv.col >>= 3;
-        mvp_full.as_mv.row >>= 3;
-
-        // adjust search range according to sr from mv prediction
-        step_param = MAX(step_param, sr);
-
-        // Further step/diamond searches as necessary
-        further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
-        bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
-                                         sadpb, further_steps, 1,
-                                         &cpi->fn_ptr[block_size],
-                                         &ref_mv[0], &tmp_mv);
-
-        x->mv_col_min = tmp_col_min;
-        x->mv_col_max = tmp_col_max;
-        x->mv_row_min = tmp_row_min;
-        x->mv_row_max = tmp_row_max;
-
-        if (bestsme < INT_MAX) {
-          int dis; /* TODO: use dis in distortion calculation later. */
-          unsigned int sse;
-          cpi->find_fractional_mv_step(x, &tmp_mv,
-                                       &ref_mv[0],
-                                       x->errorperbit,
-                                       &cpi->fn_ptr[block_size],
-                                       x->nmvjointcost, x->mvcost,
-                                       &dis, &sse);
-        }
-        frame_mv[NEWMV][refs[0]].as_int =
-          xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
-
-        // Add the new motion vector cost to our rolling cost variable
-        *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
-                                  x->nmvjointcost, x->mvcost,
-                                  96, xd->allow_high_precision_mv);
-
-        // restore the predictor, if required
-        if (scaled_ref_frame) {
-          int i;
-
-          for (i = 0; i < MAX_MB_PLANE; i++)
-            xd->plane[i].pre[0] = backup_yv12[i];
-        }
-      }
-      break;
-    case NEARMV:
-    case NEARESTMV:
-    case ZEROMV:
-    default:
-      break;
-  }
-  for (i = 0; i < num_refs; ++i) {
-    cur_mv[i] = frame_mv[this_mode][refs[i]];
-    // Clip "next_nearest" so that it does not extend to far out of image
-    if (this_mode == NEWMV)
-      assert(!clamp_mv2(&cur_mv[i], xd));
-    else
-      clamp_mv2(&cur_mv[i], xd);
-
-    if (mv_check_bounds(x, &cur_mv[i]))
-      return INT64_MAX;
-    mbmi->mv[i].as_int = cur_mv[i].as_int;
-  }
-
-
-  /* We don't include the cost of the second reference here, because there
-   * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
-   * words if you present them in that order, the second one is always known
-   * if the first is known */
-  *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP),
-                                is_comp_pred);
-  *rate2 += vp9_cost_mv_ref(cpi, this_mode,
-                            mbmi->mb_mode_context[mbmi->ref_frame]);
-#if CONFIG_COMP_INTERINTRA_PRED
-  if (!is_comp_pred) {
-    *compmode_interintra_cost = vp9_cost_bit(cm->fc.interintra_prob,
-                                             is_comp_interintra_pred);
-    if (is_comp_interintra_pred) {
-      *compmode_interintra_cost +=
-          x->mbmode_cost[xd->frame_type][mbmi->interintra_mode];
-#if SEPARATE_INTERINTRA_UV
-      *compmode_interintra_cost +=
-          x->intra_uv_mode_cost[xd->frame_type][mbmi->interintra_uv_mode];
-#endif
-    }
-  }
-#endif
-
-  pred_exists = 0;
-  interpolating_intpel_seen = 0;
-  // Are all MVs integer pel for Y and UV
-  intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
-              (mbmi->mv[0].as_mv.col & 15) == 0;
-  if (is_comp_pred)
-    intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
-                 (mbmi->mv[1].as_mv.col & 15) == 0;
-  // Search for best switchable filter by checking the variance of
-  // pred error irrespective of whether the filter will be used
-  if (1) {
-    int i, newbest;
-    int tmp_rate_sum = 0, tmp_dist_sum = 0;
-    for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
-      int rs = 0;
-      const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
-      const int is_intpel_interp = intpel_mv &&
-                                   vp9_is_interpolating_filter[filter];
-      mbmi->interp_filter = filter;
-      vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
-      if (cm->mcomp_filter_type == SWITCHABLE) {
-        const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
-        const int m = vp9_switchable_interp_map[mbmi->interp_filter];
-        rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
-      }
-
-      if (interpolating_intpel_seen && is_intpel_interp) {
-        rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
-      } else {
-        int rate_sum = 0, dist_sum = 0;
-        vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
-        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
-        if (!interpolating_intpel_seen && is_intpel_interp) {
-          tmp_rate_sum = rate_sum;
-          tmp_dist_sum = dist_sum;
-        }
-      }
-      newbest = i == 0 || rd < best_rd;
-      if (newbest) {
-        best_rd = rd;
-        *best_filter = mbmi->interp_filter;
-      }
-
-      if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
-          (cm->mcomp_filter_type != SWITCHABLE &&
-           cm->mcomp_filter_type == mbmi->interp_filter)) {
-        int i;
-        for (i = 0; i < MI_SIZE * bh; ++i)
-          vpx_memcpy(tmp_ybuf + i * MI_SIZE * bw,
-                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
-                     sizeof(unsigned char) * MI_SIZE * bw);
-        for (i = 0; i < MI_UV_SIZE * bh; ++i)
-          vpx_memcpy(tmp_ubuf + i * MI_UV_SIZE * bw,
-                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
-                     sizeof(unsigned char) * MI_UV_SIZE * bw);
-        for (i = 0; i < MI_UV_SIZE * bh; ++i)
-          vpx_memcpy(tmp_vbuf + i * MI_UV_SIZE * bw,
-                     xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
-                     sizeof(unsigned char) * MI_UV_SIZE * bw);
-        pred_exists = 1;
-      }
-      interpolating_intpel_seen |= is_intpel_interp;
-    }
-  }
-
-  // Set the appripriate filter
-  mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
-      cm->mcomp_filter_type : *best_filter;
-  vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
-  if (pred_exists) {
-    // FIXME(rbultje): mb code still predicts into xd->predictor
-    for (i = 0; i < bh * MI_SIZE; ++i)
-      vpx_memcpy(xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
-                 tmp_ybuf + i * bw * MI_SIZE,
-                 sizeof(unsigned char) * bw * MI_SIZE);
-    for (i = 0; i < bh * MI_UV_SIZE; ++i)
-      vpx_memcpy(xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
-                 tmp_ubuf + i * bw * MI_UV_SIZE,
-                 sizeof(unsigned char) * bw * MI_UV_SIZE);
-    for (i = 0; i < bh * MI_UV_SIZE; ++i)
-      vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
-                 tmp_vbuf + i * bw * MI_UV_SIZE,
-                 sizeof(unsigned char) * bw * MI_UV_SIZE);
-  } else {
-    // Handles the special case when a filter that is not in the
-    // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
-    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-  }
-
-  if (cpi->common.mcomp_filter_type == SWITCHABLE) {
-    const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
-    const int m = vp9_switchable_interp_map[mbmi->interp_filter];
-    *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
-  }
-
-  if (cpi->active_map_enabled && x->active_ptr[0] == 0)
-    x->skip = 1;
-  else if (x->encode_breakout) {
-    unsigned int var, sse;
-    int threshold = (xd->plane[0].dequant[1]
-                     * xd->plane[0].dequant[1] >> 4);
-
-    if (threshold < x->encode_breakout)
-      threshold = x->encode_breakout;
-
-    if (bsize != BLOCK_SIZE_MB16X16) {
-      var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
-                                       x->plane[0].src.stride,
-                                       xd->plane[0].dst.buf,
-                                       xd->plane[0].dst.stride,
-                                       &sse);
-    } else {
-      var = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                              xd->plane[0].dst.buf, xd->plane[0].dst.stride,
-                              &sse);
-    }
-
-    if ((int)sse < threshold) {
-      unsigned int q2dc = xd->plane[0].dequant[0];
-      /* If there is no codeable 2nd order dc
-         or a very small uniform pixel change change */
-      if ((sse - var < q2dc * q2dc >> 4) ||
-          (sse / 2 > var && sse - var < 64)) {
-        // Check u and v to make sure skip is ok
-        int sse2;
-
-        if (bsize != BLOCK_SIZE_MB16X16) {
-          unsigned int sse2u, sse2v;
-          // FIXME(rbultje): mb predictors predict into xd->predictor
-          var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
-                                              x->plane[1].src.stride,
-                                              xd->plane[1].dst.buf,
-                                              xd->plane[1].dst.stride, &sse2u);
-          var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
-                                              x->plane[1].src.stride,
-                                              xd->plane[2].dst.buf,
-                                              xd->plane[1].dst.stride, &sse2v);
-          sse2 = sse2u + sse2v;
-        } else {
-          unsigned int sse2u, sse2v;
-          var = vp9_variance8x8(x->plane[1].src.buf, x->plane[1].src.stride,
-                                xd->plane[1].dst.buf, xd->plane[1].dst.stride,
-                                &sse2u);
-          var = vp9_variance8x8(x->plane[2].src.buf, x->plane[1].src.stride,
-                                xd->plane[2].dst.buf, xd->plane[1].dst.stride,
-                                &sse2v);
-          sse2 = sse2u + sse2v;
-        }
-
-        if (sse2 * 2 < threshold) {
-          x->skip = 1;
-          *distortion = sse + sse2;
-          *rate2 = 500;
-
-          /* for best_yrd calculation */
-          *rate_uv = 0;
-          *distortion_uv = sse2;
-
-          *disable_skip = 1;
-          this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
-        }
-      }
-    }
-  }
-
-  if (!x->skip) {
-    int skippable_y, skippable_uv;
-
-    // Y cost and distortion
-    super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
-                    bsize, txfm_cache);
-    *rate2 += *rate_y;
-    *distortion += *distortion_y;
-
-    super_block_uvrd(cm, x, rate_uv, distortion_uv,
-                     &skippable_uv, bsize);
-
-    *rate2 += *rate_uv;
-    *distortion += *distortion_uv;
-    *skippable = skippable_y && skippable_uv;
-  }
-
-  if (!(*mode_excluded)) {
-    if (is_comp_pred) {
-      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
-    } else {
-      *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
-    }
-#if CONFIG_COMP_INTERINTRA_PRED
-    if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
-#endif
-  }
-
-  return this_rd;  // if 0, this will be re-calculated by caller
-}
-
-#if !CONFIG_SB8X8
-static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                               int mi_row, int mi_col,
-                               int *returnrate, int *returndistortion,
-                               int64_t *returnintra) {
-  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
-    VP9_ALT_FLAG };
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  union b_mode_info best_bmodes[16];
-  MB_MODE_INFO best_mbmode;
-  PARTITION_INFO best_partition;
-  int_mv best_ref_mv, second_best_ref_mv;
-  MB_PREDICTION_MODE this_mode;
-  MB_PREDICTION_MODE best_mode = DC_PRED;
-  MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
-  int i, best_mode_index = 0;
-  int mode8x8[4];
-  unsigned char segment_id = mbmi->segment_id;
-
-  int mode_index;
-  int mdcounts[4];
-  int rate, distortion;
-  int rate2, distortion2;
-  int64_t best_txfm_rd[NB_TXFM_MODES];
-  int64_t best_txfm_diff[NB_TXFM_MODES];
-  int64_t best_pred_diff[NB_PREDICTION_TYPES];
-  int64_t best_pred_rd[NB_PREDICTION_TYPES];
-  int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX;
-#if CONFIG_COMP_INTERINTRA_PRED
-  int is_best_interintra = 0;
-  int64_t best_intra16_rd = INT64_MAX;
-  int best_intra16_mode = DC_PRED;
-#if SEPARATE_INTERINTRA_UV
-  int best_intra16_uv_mode = DC_PRED;
-#endif
-#endif
-  int64_t best_overall_rd = INT64_MAX;
-  INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
-  INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
-  int uv_intra_rate[2], uv_intra_distortion[2], uv_intra_rate_tokenonly[2];
-  int uv_intra_skippable[2];
-  MB_PREDICTION_MODE uv_intra_mode[2];
-  int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
-  int distortion_uv = INT_MAX;
-  int64_t best_yrd = INT64_MAX;
-
-  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
-  int frame_mdcounts[4][4];
-  YV12_BUFFER_CONFIG yv12_mb[4];
-
-  unsigned int ref_costs[MAX_REF_FRAMES];
-  int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
-
-  int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
-                                             cpi->common.y_dc_delta_q);
-  int64_t mode_distortions[MB_MODE_COUNT] = {-1};
-  int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
-  int ref_frame;
-
-  struct scale_factors scale_factor[4];
-
-  vpx_memset(mode8x8, 0, sizeof(mode8x8));
-  vpx_memset(&frame_mv, 0, sizeof(frame_mv));
-  vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
-  vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
-  vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0,
-             sizeof(PICK_MODE_CONTEXT));
-
-  x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error = 0;
-  x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error = 0;
-
-  for (i = 0; i < MAX_REF_FRAMES; i++)
-    frame_mv[NEWMV][i].as_int = INVALID_MV;
-  for (i = 0; i < NB_PREDICTION_TYPES; ++i)
-    best_pred_rd[i] = INT64_MAX;
-  for (i = 0; i < NB_TXFM_MODES; i++)
-    best_txfm_rd[i] = INT64_MAX;
-
-  for (i = 0; i < NB_PARTITIONINGS; i++) {
-    int j, k;
-
-    for (j = 0; j < 16; j++)
-      for (k = 0; k < MAX_REF_FRAMES - 1; k++)
-        seg_mvs[i][j][k].as_int = INVALID_MV;
-  }
-
-  if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
-    setup_buffer_inter(cpi, x, cpi->lst_fb_idx,
-                       LAST_FRAME, BLOCK_16X16, mi_row, mi_col,
-                       frame_mv[NEARESTMV], frame_mv[NEARMV],
-                       frame_mdcounts, yv12_mb, scale_factor);
-  }
-
-  if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
-    setup_buffer_inter(cpi, x, cpi->gld_fb_idx,
-                       GOLDEN_FRAME, BLOCK_16X16, mi_row, mi_col,
-                       frame_mv[NEARESTMV], frame_mv[NEARMV],
-                       frame_mdcounts, yv12_mb, scale_factor);
-  }
-
-  if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
-    setup_buffer_inter(cpi, x, cpi->alt_fb_idx,
-                       ALTREF_FRAME, BLOCK_16X16, mi_row, mi_col,
-                       frame_mv[NEARESTMV], frame_mv[NEARMV],
-                       frame_mdcounts, yv12_mb, scale_factor);
-  }
-
-  *returnintra = INT64_MAX;
-
-  mbmi->ref_frame = INTRA_FRAME;
-
-  /* Initialize zbin mode boost for uv costing */
-  cpi->zbin_mode_boost = 0;
-  vp9_update_zbin_extra(cpi, x);
-
-  xd->mode_info_context->mbmi.mode = DC_PRED;
-
-  for (i = 0; i <= TX_8X8; i++) {
-    mbmi->txfm_size = i;
-    rd_pick_intra_sbuv_mode(cpi, x, &uv_intra_rate[i],
-                            &uv_intra_rate_tokenonly[i],
-                            &uv_intra_distortion[i],
-                            &uv_intra_skippable[i],
-                            BLOCK_SIZE_MB16X16);
-    uv_intra_mode[i] = mbmi->uv_mode;
-  }
-
-  // Get estimates of reference frame costs for each reference frame
-  // that depend on the current prediction etc.
-  estimate_ref_frame_costs(cpi, segment_id, ref_costs);
-
-  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
-    int64_t this_rd = INT64_MAX;
-    int disable_skip = 0, skippable = 0;
-    int other_cost = 0;
-    int compmode_cost = 0;
-#if CONFIG_COMP_INTERINTRA_PRED
-    int compmode_interintra_cost = 0;
-#endif
-    int mode_excluded = 0;
-    int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
-    YV12_BUFFER_CONFIG *scaled_ref_frame;
-
-    // These variables hold are rolling total cost and distortion for this mode
-    rate2 = 0;
-    distortion2 = 0;
-    rate_y = 0;
-    rate_uv = 0;
-
-    x->skip = 0;
-
-    this_mode = vp9_mode_order[mode_index].mode;
-    mbmi->mode = this_mode;
-    mbmi->uv_mode = DC_PRED;
-    mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
-    mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
-
-    mbmi->interp_filter = cm->mcomp_filter_type;
-
-    set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
-                      scale_factor);
-
-    vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-
-    // Test best rd so far against threshold for trying this mode.
-    if (best_rd <= cpi->rd_threshes[mode_index])
-      continue;
-
-    // Ensure that the references used by this mode are available.
-    if (mbmi->ref_frame &&
-        !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame]))
-      continue;
-
-    if (mbmi->second_ref_frame > 0 &&
-        !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))
-      continue;
-
-    // only scale on zeromv.
-    if (mbmi->ref_frame > 0 &&
-          (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
-           yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
-        this_mode != ZEROMV)
-      continue;
-
-    if (mbmi->second_ref_frame > 0 &&
-          (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
-           yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
-        this_mode != ZEROMV)
-      continue;
-
-    // current coding mode under rate-distortion optimization test loop
-#if CONFIG_COMP_INTERINTRA_PRED
-    mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
-    mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
-#endif
-
-    // If the segment reference frame feature is enabled....
-    // then do nothing if the current ref frame is not allowed..
-    if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
-        !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) {
-      continue;
-    // If the segment skip feature is enabled....
-    // then do nothing if the current mode is not allowed..
-    } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
-               (this_mode != ZEROMV)) {
-      continue;
-    // Disable this drop out case if  the ref frame segment
-    // level feature is enabled for this segment. This is to
-    // prevent the possibility that the we end up unable to pick any mode.
-    } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
-      // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay,
-      // unless ARNR filtering is enabled in which case we want
-      // an unfiltered alternative
-      if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
-        if (this_mode != ZEROMV ||
-            mbmi->ref_frame != ALTREF_FRAME) {
-          continue;
-        }
-      }
-    }
-
-    /* everything but intra */
-    scaled_ref_frame = NULL;
-    if (mbmi->ref_frame) {
-      int ref = mbmi->ref_frame;
-      int fb;
-
-      best_ref_mv = mbmi->ref_mvs[ref][0];
-      vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
-
-      if (mbmi->ref_frame == LAST_FRAME) {
-        fb = cpi->lst_fb_idx;
-      } else if (mbmi->ref_frame == GOLDEN_FRAME) {
-        fb = cpi->gld_fb_idx;
-      } else {
-        fb = cpi->alt_fb_idx;
-      }
-
-      if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
-        scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
-    }
-
-    if (mbmi->second_ref_frame > 0) {
-      int ref = mbmi->second_ref_frame;
-
-      second_best_ref_mv = mbmi->ref_mvs[ref][0];
-    }
-
-    // TODO(jkoleszar) scaling/translation handled during creation of yv12_mb
-    // currently.
-    setup_pre_planes(xd, &yv12_mb[mbmi->ref_frame],
-        mbmi->second_ref_frame > 0 ? &yv12_mb[mbmi->second_ref_frame] : NULL,
-        0, 0, NULL, NULL);
-
-    // Experimental code. Special case for gf and arf zeromv modes.
-    // Increase zbin size to suppress noise
-    if (cpi->zbin_mode_boost_enabled) {
-      if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME)
-        cpi->zbin_mode_boost = 0;
-      else {
-        if (vp9_mode_order[mode_index].mode == ZEROMV) {
-          if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME)
-            cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-          else
-            cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-        } else if (vp9_mode_order[mode_index].mode == SPLITMV)
-          cpi->zbin_mode_boost = 0;
-        else
-          cpi->zbin_mode_boost = MV_ZBIN_BOOST;
-      }
-
-      vp9_update_zbin_extra(cpi, x);
-    }
-
-    // Intra
-    if (!mbmi->ref_frame) {
-      switch (this_mode) {
-        default:
-        case V_PRED:
-        case H_PRED:
-        case D45_PRED:
-        case D135_PRED:
-        case D117_PRED:
-        case D153_PRED:
-        case D27_PRED:
-        case D63_PRED:
-          rate2 += intra_cost_penalty;
-        case DC_PRED:
-        case TM_PRED:
-          mbmi->ref_frame = INTRA_FRAME;
-          // FIXME compound intra prediction
-          vp9_build_intra_predictors_sby_s(&x->e_mbd, BLOCK_SIZE_MB16X16);
-          // vp9_build_intra_predictors_mby(&x->e_mbd);
-          super_block_yrd(cpi, x, &rate_y, &distortion, &skippable,
-                          BLOCK_SIZE_MB16X16, txfm_cache);
-          rate2 += rate_y;
-          distortion2 += distortion;
-          rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode];
-
-          rate2 += uv_intra_rate[mbmi->txfm_size != TX_4X4];
-          rate_uv = uv_intra_rate_tokenonly[mbmi->txfm_size != TX_4X4];
-          distortion2 += uv_intra_distortion[mbmi->txfm_size != TX_4X4];
-          distortion_uv = uv_intra_distortion[mbmi->txfm_size != TX_4X4];
-          skippable = skippable &&
-                      uv_intra_skippable[mbmi->txfm_size != TX_4X4];
-          break;
-        case I4X4_PRED: {
-          int64_t tmp_rd;
-
-          // Note the rate value returned here includes the cost of coding
-          // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED];
-          mbmi->txfm_size = TX_4X4;
-          tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
-                                             &distortion, best_yrd);
-          rate2 += rate;
-          rate2 += intra_cost_penalty;
-          distortion2 += distortion;
-
-          if (tmp_rd < best_yrd) {
-            rate2 += uv_intra_rate[TX_4X4];
-            rate_uv = uv_intra_rate_tokenonly[TX_4X4];
-            distortion2 += uv_intra_distortion[TX_4X4];
-            distortion_uv = uv_intra_distortion[TX_4X4];
-          } else {
-            this_rd = INT64_MAX;
-            disable_skip = 1;
-          }
-        }
-        break;
-        case I8X8_PRED: {
-          int64_t tmp_rd;
-
-          tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y,
-                                                      &distortion, mode8x8,
-                                                      best_yrd, txfm_cache);
-          rate2 += rate;
-          rate2 += intra_cost_penalty;
-          distortion2 += distortion;
-
-          /* TODO: uv rate maybe over-estimated here since there is UV intra
-                   mode coded in I8X8_PRED prediction */
-          if (tmp_rd < best_yrd) {
-            rate2 += uv_intra_rate[TX_4X4];
-            rate_uv = uv_intra_rate_tokenonly[TX_4X4];
-            distortion2 += uv_intra_distortion[TX_4X4];
-            distortion_uv = uv_intra_distortion[TX_4X4];
-          } else {
-            this_rd = INT64_MAX;
-            disable_skip = 1;
-          }
-        }
-        break;
-      }
-    }
-    // Split MV. The code is very different from the other inter modes so
-    // special case it.
-    else if (this_mode == SPLITMV) {
-      const int is_comp_pred = mbmi->second_ref_frame > 0;
-      int64_t this_rd_thresh;
-      int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
-      int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
-      int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
-      int switchable_filter_index;
-      int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL;
-      union b_mode_info tmp_best_bmodes[16];
-      MB_MODE_INFO tmp_best_mbmode;
-      PARTITION_INFO tmp_best_partition;
-      int pred_exists = 0;
-
-      this_rd_thresh =
-          (mbmi->ref_frame == LAST_FRAME) ?
-          cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
-      this_rd_thresh =
-          (mbmi->ref_frame == GOLDEN_FRAME) ?
-          cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
-      xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-
-      for (switchable_filter_index = 0;
-           switchable_filter_index < VP9_SWITCHABLE_FILTERS;
-           ++switchable_filter_index) {
-        int newbest;
-        mbmi->interp_filter =
-            vp9_switchable_interp[switchable_filter_index];
-        vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-
-        tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
-                                             second_ref, best_yrd, mdcounts,
-                                             &rate, &rate_y, &distortion,
-                                             &skippable,
-                                             (int)this_rd_thresh, seg_mvs,
-                                             txfm_cache);
-        if (cpi->common.mcomp_filter_type == SWITCHABLE) {
-          int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
-                   [vp9_get_pred_context(&cpi->common, xd,
-                                         PRED_SWITCHABLE_INTERP)]
-                   [vp9_switchable_interp_map[mbmi->interp_filter]];
-          tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
-        }
-        newbest = (tmp_rd < tmp_best_rd);
-        if (newbest) {
-          tmp_best_filter = mbmi->interp_filter;
-          tmp_best_rd = tmp_rd;
-        }
-        if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
-            (mbmi->interp_filter == cm->mcomp_filter_type &&
-             cm->mcomp_filter_type != SWITCHABLE)) {
-          tmp_best_rdu = tmp_rd;
-          tmp_best_rate = rate;
-          tmp_best_ratey = rate_y;
-          tmp_best_distortion = distortion;
-          tmp_best_skippable = skippable;
-          vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO));
-          vpx_memcpy(&tmp_best_partition, x->partition_info,
-                     sizeof(PARTITION_INFO));
-          for (i = 0; i < 16; i++) {
-            tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
-          }
-          pred_exists = 1;
-        }
-      }  // switchable_filter_index loop
-
-      mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
-                             tmp_best_filter : cm->mcomp_filter_type);
-      vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-      if (!pred_exists) {
-        // Handles the special case when a filter that is not in the
-        // switchable list (bilinear, 6-tap) is indicated at the frame level
-        tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
-                                             second_ref, best_yrd, mdcounts,
-                                             &rate, &rate_y, &distortion,
-                                             &skippable,
-                                             (int)this_rd_thresh, seg_mvs,
-                                             txfm_cache);
-      } else {
-        if (cpi->common.mcomp_filter_type == SWITCHABLE) {
-          int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
-                   [vp9_get_pred_context(&cpi->common, xd,
-                                         PRED_SWITCHABLE_INTERP)]
-                   [vp9_switchable_interp_map[mbmi->interp_filter]];
-          tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
-        }
-        tmp_rd = tmp_best_rdu;
-        rate = tmp_best_rate;
-        rate_y = tmp_best_ratey;
-        distortion = tmp_best_distortion;
-        skippable = tmp_best_skippable;
-        vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO));
-        vpx_memcpy(x->partition_info, &tmp_best_partition,
-                   sizeof(PARTITION_INFO));
-        for (i = 0; i < 16; i++) {
-          xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
-        }
-      }
-
-      rate2 += rate;
-      distortion2 += distortion;
-
-      if (cpi->common.mcomp_filter_type == SWITCHABLE)
-        rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
-            [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
-            [vp9_switchable_interp_map[mbmi->interp_filter]];
-
-      // If even the 'Y' rd value of split is higher than best so far
-      // then dont bother looking at UV
-      if (tmp_rd < best_yrd) {
-        int uv_skippable;
-
-        vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
-                                        BLOCK_SIZE_MB16X16);
-
-        vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16);
-
-        super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
-                                  &uv_skippable, BLOCK_SIZE_MB16X16, TX_4X4);
-        rate2 += rate_uv;
-        distortion2 += distortion_uv;
-        skippable = skippable && uv_skippable;
+      if (is_comp_pred) {
+        if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
+            frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
+          return INT64_MAX;
+        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]],
+                                  &ref_mv[0],
+                                  x->nmvjointcost, x->mvcost, 96,
+                                  x->e_mbd.allow_high_precision_mv);
+        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]],
+                                  &ref_mv[1],
+                                  x->nmvjointcost, x->mvcost, 96,
+                                  x->e_mbd.allow_high_precision_mv);
       } else {
-        this_rd = INT64_MAX;
-        disable_skip = 1;
-      }
-
-      if (!mode_excluded) {
-        if (is_comp_pred)
-          mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
-        else
-          mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
-      }
-
-      compmode_cost =
-        vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
-      mbmi->mode = this_mode;
-    }
-    else {
-#if CONFIG_COMP_INTERINTRA_PRED
-      if (mbmi->second_ref_frame == INTRA_FRAME) {
-        if (best_intra16_mode == DC_PRED - 1) continue;
-        mbmi->interintra_mode = best_intra16_mode;
-#if SEPARATE_INTERINTRA_UV
-        mbmi->interintra_uv_mode = best_intra16_uv_mode;
-#else
-        mbmi->interintra_uv_mode = best_intra16_mode;
-#endif
-      }
-#endif
-      this_rd = handle_inter_mode(cpi, x, BLOCK_SIZE_MB16X16,
-                                  mdcounts, txfm_cache,
-                                  &rate2, &distortion2, &skippable,
-                                  &compmode_cost,
-#if CONFIG_COMP_INTERINTRA_PRED
-                                  &compmode_interintra_cost,
-#endif
-                                  &rate_y, &distortion,
-                                  &rate_uv, &distortion_uv,
-                                  &mode_excluded, &disable_skip,
-                                  mode_index, &tmp_best_filter, frame_mv,
-                                  scaled_ref_frame, mi_row, mi_col);
-      if (this_rd == INT64_MAX)
-        continue;
-    }
-
-#if CONFIG_COMP_INTERINTRA_PRED
-    if (cpi->common.use_interintra)
-      rate2 += compmode_interintra_cost;
-#endif
-
-    if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
-      rate2 += compmode_cost;
-
-    // Estimate the reference frame signaling cost and add it
-    // to the rolling cost variable.
-    rate2 += ref_costs[mbmi->ref_frame];
-
-    if (!disable_skip) {
-      // Test for the condition where skip block will be activated
-      // because there are no non zero coefficients and make any
-      // necessary adjustment for rate. Ignore if skip is coded at
-      // segment level as the cost wont have been added in.
-      int mb_skip_allowed;
-
-      // Is Mb level skip allowed (i.e. not coded at segment level).
-      mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
-
-      if (skippable) {
-        mbmi->mb_skip_coeff = 1;
+        struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+        int bestsme = INT_MAX;
+        int further_steps, step_param = cpi->sf.first_step;
+        int sadpb = x->sadperbit16;
+        int_mv mvp_full, tmp_mv;
+        int sr = 0;
 
-        // Back out the coefficient coding costs
-        rate2 -= (rate_y + rate_uv);
-        // for best_yrd calculation
-        rate_uv = 0;
+        int tmp_col_min = x->mv_col_min;
+        int tmp_col_max = x->mv_col_max;
+        int tmp_row_min = x->mv_row_min;
+        int tmp_row_max = x->mv_row_max;
 
-        if (mb_skip_allowed) {
-          int prob_skip_cost;
+        if (scaled_ref_frame) {
+          int i;
 
-          // Cost the skip mb case
-          vp9_prob skip_prob =
-            vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP);
+          // Swap out the reference frame for a version that's been scaled to
+          // match the resolution of the current frame, allowing the existing
+          // motion search code to be used without additional modifications.
+          for (i = 0; i < MAX_MB_PLANE; i++)
+            backup_yv12[i] = xd->plane[i].pre[0];
 
-          if (skip_prob) {
-            prob_skip_cost = vp9_cost_bit(skip_prob, 1);
-            rate2 += prob_skip_cost;
-            other_cost += prob_skip_cost;
-          }
-        }
-      } else {
-        // Add in the cost of the no skip flag.
-        mbmi->mb_skip_coeff = 0;
-        if (mb_skip_allowed) {
-          int prob_skip_cost = vp9_cost_bit(
-                 vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0);
-          rate2 += prob_skip_cost;
-          other_cost += prob_skip_cost;
+          setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col,
+                           NULL, NULL);
         }
-      }
 
-      // Calculate the final RD estimate for this mode.
-      this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-    }
+        vp9_clamp_mv_min_max(x, &ref_mv[0]);
 
-    // Keep record of best intra distortion
-    if ((mbmi->ref_frame == INTRA_FRAME) &&
-        (this_rd < best_intra_rd)) {
-      best_intra_rd = this_rd;
-      *returnintra = distortion2;
-    }
-#if CONFIG_COMP_INTERINTRA_PRED
-    if ((mbmi->ref_frame == INTRA_FRAME) &&
-        (this_mode <= TM_PRED) &&
-        (this_rd < best_intra16_rd)) {
-      best_intra16_rd = this_rd;
-      best_intra16_mode = this_mode;
-#if SEPARATE_INTERINTRA_UV
-      best_intra16_uv_mode = uv_intra_mode[mbmi->txfm_size != TX_4X4];
-#endif
-    }
-#endif
+        sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
 
-    if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
-      for (i = 0; i < NB_PREDICTION_TYPES; ++i)
-        best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
+        // mvp_full.as_int = ref_mv[0].as_int;
+        mvp_full.as_int =
+         mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
 
-    if (this_rd < best_overall_rd) {
-      best_overall_rd = this_rd;
-      best_filter = tmp_best_filter;
-      best_mode = this_mode;
-#if CONFIG_COMP_INTERINTRA_PRED
-      is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
-#endif
-    }
+        mvp_full.as_mv.col >>= 3;
+        mvp_full.as_mv.row >>= 3;
 
-    // Store the respective mode distortions for later use.
-    // Store the respective mode distortions for later use.
-    if (mode_distortions[this_mode] == -1
-        || distortion2 < mode_distortions[this_mode]) {
-      mode_distortions[this_mode] = distortion2;
-    }
-    if (frame_distortions[mbmi->ref_frame] == -1 ||
-        distortion2 < frame_distortions[mbmi->ref_frame]) {
-       frame_distortions[mbmi->ref_frame] = distortion2;
-    }
+        // adjust search range according to sr from mv prediction
+        step_param = MAX(step_param, sr);
 
-    // Did this mode help.. i.e. is it the new best mode
-    if (this_rd < best_rd || x->skip) {
-      if (!mode_excluded) {
-        /*
-        if (mbmi->second_ref_frame == INTRA_FRAME) {
-          printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd);
-        }
-        */
-        // Note index of best mode so far
-        best_mode_index = mode_index;
+        // Further step/diamond searches as necessary
+        further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-        if (this_mode <= I4X4_PRED) {
-          if (mbmi->txfm_size != TX_4X4
-              && this_mode != I4X4_PRED
-              && this_mode != I8X8_PRED)
-            mbmi->uv_mode = uv_intra_mode[TX_8X8];
-          else
-            mbmi->uv_mode = uv_intra_mode[TX_4X4];
-          /* required for left and above block mv */
-          mbmi->mv[0].as_int = 0;
-        }
+        bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+                                         sadpb, further_steps, 1,
+                                         &cpi->fn_ptr[block_size],
+                                         &ref_mv[0], &tmp_mv);
+
+        x->mv_col_min = tmp_col_min;
+        x->mv_col_max = tmp_col_max;
+        x->mv_row_min = tmp_row_min;
+        x->mv_row_max = tmp_row_max;
 
-        other_cost += ref_costs[mbmi->ref_frame];
+        if (bestsme < INT_MAX) {
+          int dis; /* TODO: use dis in distortion calculation later. */
+          unsigned int sse;
+          cpi->find_fractional_mv_step(x, &tmp_mv,
+                                       &ref_mv[0],
+                                       x->errorperbit,
+                                       &cpi->fn_ptr[block_size],
+                                       x->nmvjointcost, x->mvcost,
+                                       &dis, &sse);
+        }
+        frame_mv[NEWMV][refs[0]].as_int =
+          xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
 
-        /* Calculate the final y RD estimate for this mode */
-        best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost),
-                          (distortion2 - distortion_uv));
+        // Add the new motion vector cost to our rolling cost variable
+        *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
+                                  x->nmvjointcost, x->mvcost,
+                                  96, xd->allow_high_precision_mv);
 
-        *returnrate = rate2;
-        *returndistortion = distortion2;
-        best_rd = this_rd;
-        vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
-        vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
+        // restore the predictor, if required
+        if (scaled_ref_frame) {
+          int i;
 
-        if ((this_mode == I4X4_PRED)
-            || (this_mode == I8X8_PRED)
-            || (this_mode == SPLITMV))
-          for (i = 0; i < 16; i++) {
-            best_bmodes[i] = xd->mode_info_context->bmi[i];
-          }
+          for (i = 0; i < MAX_MB_PLANE; i++)
+            xd->plane[i].pre[0] = backup_yv12[i];
+        }
       }
+      break;
+    case NEARMV:
+    case NEARESTMV:
+    case ZEROMV:
+    default:
+      break;
+  }
+  for (i = 0; i < num_refs; ++i) {
+    cur_mv[i] = frame_mv[this_mode][refs[i]];
+    // Clip "next_nearest" so that it does not extend to far out of image
+    if (this_mode == NEWMV)
+      assert(!clamp_mv2(&cur_mv[i], xd));
+    else
+      clamp_mv2(&cur_mv[i], xd);
 
-      // Testing this mode gave rise to an improvement in best error score.
-      // Lower threshold a bit for next time
-      cpi->rd_thresh_mult[mode_index] =
-          (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
-          cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-      cpi->rd_threshes[mode_index] =
-          (cpi->rd_baseline_thresh[mode_index] >> 7) *
-          cpi->rd_thresh_mult[mode_index];
-    } else {
-      // If the mode did not help improve the best error case then raise the
-      // threshold for testing that mode next time around.
-      cpi->rd_thresh_mult[mode_index] += 4;
+    if (mv_check_bounds(x, &cur_mv[i]))
+      return INT64_MAX;
+    mbmi->mv[i].as_int = cur_mv[i].as_int;
+  }
 
-      if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-        cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
 
-      cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7)
-          * cpi->rd_thresh_mult[mode_index];
+  /* We don't include the cost of the second reference here, because there
+   * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
+   * words if you present them in that order, the second one is always known
+   * if the first is known */
+  *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP),
+                                is_comp_pred);
+  *rate2 += vp9_cost_mv_ref(cpi, this_mode,
+                            mbmi->mb_mode_context[mbmi->ref_frame]);
+#if CONFIG_COMP_INTERINTRA_PRED
+  if (!is_comp_pred) {
+    *compmode_interintra_cost = vp9_cost_bit(cm->fc.interintra_prob,
+                                             is_comp_interintra_pred);
+    if (is_comp_interintra_pred) {
+      *compmode_interintra_cost +=
+          x->mbmode_cost[xd->frame_type][mbmi->interintra_mode];
+#if SEPARATE_INTERINTRA_UV
+      *compmode_interintra_cost +=
+          x->intra_uv_mode_cost[xd->frame_type][mbmi->interintra_uv_mode];
+#endif
     }
+  }
+#endif
 
-    /* keep record of best compound/single-only prediction */
-    if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
-      int64_t single_rd, hybrid_rd;
-      int single_rate, hybrid_rate;
+  pred_exists = 0;
+  interpolating_intpel_seen = 0;
+  // Are all MVs integer pel for Y and UV
+  intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
+              (mbmi->mv[0].as_mv.col & 15) == 0;
+  if (is_comp_pred)
+    intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
+                 (mbmi->mv[1].as_mv.col & 15) == 0;
+  // Search for best switchable filter by checking the variance of
+  // pred error irrespective of whether the filter will be used
+  if (1) {
+    int i, newbest;
+    int tmp_rate_sum = 0, tmp_dist_sum = 0;
+    for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+      int rs = 0;
+      const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
+      const int is_intpel_interp = intpel_mv &&
+                                   vp9_is_interpolating_filter[filter];
+      mbmi->interp_filter = filter;
+      vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
 
-      if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
-        single_rate = rate2 - compmode_cost;
-        hybrid_rate = rate2;
-      } else {
-        single_rate = rate2;
-        hybrid_rate = rate2 + compmode_cost;
+      if (cm->mcomp_filter_type == SWITCHABLE) {
+        const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+        const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+        rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
       }
 
-      single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
-      hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+      if (interpolating_intpel_seen && is_intpel_interp) {
+        rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
+      } else {
+        int rate_sum = 0, dist_sum = 0;
+        vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
+        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
+        if (!interpolating_intpel_seen && is_intpel_interp) {
+          tmp_rate_sum = rate_sum;
+          tmp_dist_sum = dist_sum;
+        }
+      }
+      newbest = i == 0 || rd < best_rd;
 
-      if (mbmi->second_ref_frame <= INTRA_FRAME &&
-          single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
-        best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
-      } else if (mbmi->second_ref_frame > INTRA_FRAME &&
-                 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
-        best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
+      if (newbest) {
+        best_rd = rd;
+        *best_filter = mbmi->interp_filter;
       }
-      if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
-        best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
-    }
 
-    /* keep record of best txfm size */
-    if (!mode_excluded && this_rd != INT64_MAX) {
-      for (i = 0; i < NB_TXFM_MODES; i++) {
-        int64_t adj_rd;
-        if (this_mode != I4X4_PRED) {
-          const int64_t txfm_mode_diff =
-              txfm_cache[i] - txfm_cache[cm->txfm_mode];
-          adj_rd = this_rd + txfm_mode_diff;
-        } else {
-          adj_rd = this_rd;
-        }
-        if (adj_rd < best_txfm_rd[i])
-          best_txfm_rd[i] = adj_rd;
+      if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+          (cm->mcomp_filter_type != SWITCHABLE &&
+           cm->mcomp_filter_type == mbmi->interp_filter)) {
+        int i;
+        for (i = 0; i < MI_SIZE * bh; ++i)
+          vpx_memcpy(tmp_ybuf + i * MI_SIZE * bw,
+                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
+                     sizeof(unsigned char) * MI_SIZE * bw);
+        for (i = 0; i < MI_UV_SIZE * bh; ++i)
+          vpx_memcpy(tmp_ubuf + i * MI_UV_SIZE * bw,
+                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
+                     sizeof(unsigned char) * MI_UV_SIZE * bw);
+        for (i = 0; i < MI_UV_SIZE * bh; ++i)
+          vpx_memcpy(tmp_vbuf + i * MI_UV_SIZE * bw,
+                     xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
+                     sizeof(unsigned char) * MI_UV_SIZE * bw);
+        pred_exists = 1;
       }
+      interpolating_intpel_seen |= is_intpel_interp;
     }
-
-    if (x->skip && !mode_excluded)
-      break;
   }
 
-  assert((cm->mcomp_filter_type == SWITCHABLE) ||
-         (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
-         (best_mbmode.mode <= I4X4_PRED));
-
-#if CONFIG_COMP_INTERINTRA_PRED
-  ++cpi->interintra_select_count[is_best_interintra];
-#endif
+  // Set the appripriate filter
+  mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
+                             cm->mcomp_filter_type : *best_filter;
+  vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
 
-  // Accumulate filter usage stats
-  // TODO(agrange): Use RD criteria to select interpolation filter mode.
-  if (is_inter_mode(best_mode))
-    ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
 
-  // Reduce the activation RD thresholds for the best choice mode
-  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
-      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
-    int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
+  if (pred_exists) {
+    // FIXME(rbultje): mb code still predicts into xd->predictor
+    for (i = 0; i < bh * MI_SIZE; ++i)
+      vpx_memcpy(xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
+                 tmp_ybuf + i * bw * MI_SIZE,
+                 sizeof(unsigned char) * bw * MI_SIZE);
+    for (i = 0; i < bh * MI_UV_SIZE; ++i)
+      vpx_memcpy(xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
+                 tmp_ubuf + i * bw * MI_UV_SIZE,
+                 sizeof(unsigned char) * bw * MI_UV_SIZE);
+    for (i = 0; i < bh * MI_UV_SIZE; ++i)
+      vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
+                 tmp_vbuf + i * bw * MI_UV_SIZE,
+                 sizeof(unsigned char) * bw * MI_UV_SIZE);
+  } else {
+    // Handles the special case when a filter that is not in the
+    // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
+    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+  }
 
-    cpi->rd_thresh_mult[best_mode_index] =
-        (cpi->rd_thresh_mult[best_mode_index] >=
-         (MIN_THRESHMULT + best_adjustment)) ?
-        cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
-    cpi->rd_threshes[best_mode_index] =
-        (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
-        cpi->rd_thresh_mult[best_mode_index];
+  if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+    const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+    const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+    *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
   }
 
-  // This code forces Altref,0,0 and skip for the frame that overlays a
-  // an alrtef unless Altref is filtered. However, this is unsafe if
-  // segment level coding of ref frame is enabled for this
-  // segment.
-  if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
-      cpi->is_src_frame_alt_ref &&
-      (cpi->oxcf.arnr_max_frames == 0) &&
-      (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
-    mbmi->mode = ZEROMV;
-    if (cm->txfm_mode <= ALLOW_8X8)
-      mbmi->txfm_size = cm->txfm_mode;
-    else
-      mbmi->txfm_size = TX_16X16;
-    mbmi->ref_frame = ALTREF_FRAME;
-    mbmi->mv[0].as_int = 0;
-    mbmi->uv_mode = DC_PRED;
-    mbmi->mb_skip_coeff = 1;
-    mbmi->partitioning = 0;
-    set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
-                      scale_factor);
+  if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+    x->skip = 1;
+  else if (x->encode_breakout) {
+    unsigned int var, sse;
+    int threshold = (xd->plane[0].dequant[1]
+                     * xd->plane[0].dequant[1] >> 4);
 
-    vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
-    vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
-    goto end;
-  }
+    if (threshold < x->encode_breakout)
+      threshold = x->encode_breakout;
 
-  // macroblock modes
-  vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
-  if (best_mbmode.mode == I4X4_PRED) {
-    for (i = 0; i < 16; i++) {
-      xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+    if (bsize != BLOCK_SIZE_MB16X16) {
+      var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
+                                       x->plane[0].src.stride,
+                                       xd->plane[0].dst.buf,
+                                       xd->plane[0].dst.stride,
+                                       &sse);
+    } else {
+      var = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+                              xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                              &sse);
     }
-  }
 
-  if (best_mbmode.mode == I8X8_PRED)
-    set_i8x8_block_modes(x, mode8x8);
+    if ((int)sse < threshold) {
+      unsigned int q2dc = xd->plane[0].dequant[0];
+      /* If there is no codeable 2nd order dc
+         or a very small uniform pixel change change */
+      if ((sse - var < q2dc * q2dc >> 4) ||
+          (sse / 2 > var && sse - var < 64)) {
+        // Check u and v to make sure skip is ok
+        int sse2;
 
-  if (best_mbmode.mode == SPLITMV) {
-    for (i = 0; i < 16; i++)
-      xd->mode_info_context->bmi[i].as_mv[0].as_int =
-          best_bmodes[i].as_mv[0].as_int;
-    if (mbmi->second_ref_frame > 0)
-      for (i = 0; i < 16; i++)
-        xd->mode_info_context->bmi[i].as_mv[1].as_int =
-            best_bmodes[i].as_mv[1].as_int;
+        if (bsize != BLOCK_SIZE_MB16X16) {
+          unsigned int sse2u, sse2v;
+          // FIXME(rbultje): mb predictors predict into xd->predictor
+          var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
+                                              x->plane[1].src.stride,
+                                              xd->plane[1].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2u);
+          var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
+                                              x->plane[1].src.stride,
+                                              xd->plane[2].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2v);
+          sse2 = sse2u + sse2v;
+        } else {
+          unsigned int sse2u, sse2v;
+          var = vp9_variance8x8(x->plane[1].src.buf, x->plane[1].src.stride,
+                                xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                                &sse2u);
+          var = vp9_variance8x8(x->plane[2].src.buf, x->plane[1].src.stride,
+                                xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                                &sse2v);
+          sse2 = sse2u + sse2v;
+        }
 
-    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+        if (sse2 * 2 < threshold) {
+          x->skip = 1;
+          *distortion = sse + sse2;
+          *rate2 = 500;
 
-    mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int;
-    mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
-  }
+          /* for best_yrd calculation */
+          *rate_uv = 0;
+          *distortion_uv = sse2;
 
-  for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
-    if (best_pred_rd[i] == INT64_MAX)
-      best_pred_diff[i] = INT_MIN;
-    else
-      best_pred_diff[i] = best_rd - best_pred_rd[i];
+          *disable_skip = 1;
+          this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+        }
+      }
+    }
   }
 
   if (!x->skip) {
-    for (i = 0; i < NB_TXFM_MODES; i++) {
-      if (best_txfm_rd[i] == INT64_MAX)
-        best_txfm_diff[i] = 0;
-      else
-        best_txfm_diff[i] = best_rd - best_txfm_rd[i];
-    }
-  } else {
-    vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
-  }
+    int skippable_y, skippable_uv;
 
-end:
+    // Y cost and distortion
+    super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
+                    bsize, txfm_cache);
+    *rate2 += *rate_y;
+    *distortion += *distortion_y;
 
-  // Flag all modes that have a distortion thats > 2x the best we found at
-  // this level.
-  for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
-    if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV
-        || mode_index == SPLITMV)
-      continue;
+    super_block_uvrd(cm, x, rate_uv, distortion_uv,
+                     &skippable_uv, bsize);
 
-    if (mode_distortions[mode_index] > 2 * *returndistortion) {
-      x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error |= (1
-          << mode_index);
-    }
+    *rate2 += *rate_uv;
+    *distortion += *distortion_uv;
+    *skippable = skippable_y && skippable_uv;
   }
 
-  // Flag all ref frames that have a distortion thats > 2x the best we found at
-  // this level.
-  for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
-    if (frame_distortions[ref_frame] > 2 * *returndistortion) {
-      x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error |= (1
-          << ref_frame);
+  if (!(*mode_excluded)) {
+    if (is_comp_pred) {
+      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+    } else {
+      *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
     }
+#if CONFIG_COMP_INTERINTRA_PRED
+    if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
+#endif
   }
 
-  set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
-                    scale_factor);
-  store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],
-                       best_mode_index, &best_partition,
-                       &mbmi->ref_mvs[mbmi->ref_frame][0],
-                       &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
-                                      mbmi->second_ref_frame][0],
-                       best_pred_diff, best_txfm_diff);
+  return this_rd;  // if 0, this will be re-calculated by caller
 }
-#endif  // !CONFIG_SB8X8
 
 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                int *returnrate, int *returndist,
@@ -4432,30 +2366,24 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int dist_y = 0, dist_uv;
   int y_skip = 0, uv_skip;
   int64_t txfm_cache[NB_TXFM_MODES], err;
-#if CONFIG_SB8X8
   MB_PREDICTION_MODE mode;
   TX_SIZE txfm_size;
   int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
   int64_t err4x4 = INT64_MAX;
-#endif
   int i;
 
   ctx->skip = 0;
   xd->mode_info_context->mbmi.mode = DC_PRED;
   err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
                                &dist_y, &y_skip, bsize, txfm_cache);
-#if CONFIG_SB8X8
   mode = xd->mode_info_context->mbmi.mode;
   txfm_size = xd->mode_info_context->mbmi.txfm_size;
-#endif
   rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
                           &dist_uv, &uv_skip, bsize);
-#if CONFIG_SB8X8
   if (bsize == BLOCK_SIZE_SB8X8)
     err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
                                        &rate4x4_y_tokenonly,
                                        &dist4x4_y, err);
-#endif
 
   if (y_skip && uv_skip) {
     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
@@ -4463,7 +2391,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     *returndist = dist_y + (dist_uv >> 2);
     memset(ctx->txfm_rd_diff, 0,
            sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff));
-#if CONFIG_SB8X8
     xd->mode_info_context->mbmi.mode = mode;
     xd->mode_info_context->mbmi.txfm_size = txfm_size;
   } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) {
@@ -4474,156 +2401,20 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
     }
     xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-#endif
   } else {
     *returnrate = rate_y + rate_uv +
         vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
     *returndist = dist_y + (dist_uv >> 2);
     for (i = 0; i < NB_TXFM_MODES; i++) {
-#if CONFIG_SB8X8
       ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
-#else
-      ctx->txfm_rd_diff[i] = err - txfm_cache[i];
-#endif
     }
-#if CONFIG_SB8X8
     xd->mode_info_context->mbmi.txfm_size = txfm_size;
     xd->mode_info_context->mbmi.mode = mode;
-#endif
   }
 
   vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO));
 }
 
-#if !CONFIG_SB8X8
-void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                            int *returnrate, int *returndist) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-  int64_t error4x4, error16x16;
-  int rate4x4, rate16x16 = 0, rateuv[2];
-  int dist4x4 = 0, dist16x16 = 0, distuv[2];
-  int rate;
-  int rate4x4_tokenonly = 0;
-  int rate16x16_tokenonly = 0;
-  int rateuv_tokenonly[2];
-  int64_t error8x8;
-  int rate8x8_tokenonly=0;
-  int rate8x8, dist8x8;
-  int mode16x16;
-  int mode8x8[4];
-  int dist;
-  int modeuv[2], uv_intra_skippable[2];
-  int y_intra16x16_skippable = 0;
-  int64_t txfm_cache[2][NB_TXFM_MODES];
-  TX_SIZE txfm_size_16x16, txfm_size_8x8;
-  int i;
-
-  x->mb_context[xd->sb_index][xd->mb_index].skip = 0;
-  mbmi->ref_frame = INTRA_FRAME;
-  mbmi->mode = DC_PRED;
-  for (i = 0; i <= TX_8X8; i++) {
-    mbmi->txfm_size = i;
-    rd_pick_intra_sbuv_mode(cpi, x, &rateuv[i], &rateuv_tokenonly[i],
-                            &distuv[i], &uv_intra_skippable[i],
-                            BLOCK_SIZE_MB16X16);
-    modeuv[i] = mbmi->uv_mode;
-  }
-
-  // current macroblock under rate-distortion optimization test loop
-  error16x16 = rd_pick_intra_sby_mode(cpi, x, &rate16x16,
-                                      &rate16x16_tokenonly, &dist16x16,
-                                      &y_intra16x16_skippable,
-                                      BLOCK_SIZE_MB16X16, txfm_cache[1]);
-  mode16x16 = mbmi->mode;
-  txfm_size_16x16 = mbmi->txfm_size;
-  if (y_intra16x16_skippable &&
-      ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
-       (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
-    error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
-    rate16x16 -= rate16x16_tokenonly;
-  }
-  for (i = 0; i < NB_TXFM_MODES; i++) {
-    txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] +
-                       txfm_cache[1][i];
-  }
-
-  error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8,
-                                                &rate8x8_tokenonly,
-                                                &dist8x8, mode8x8,
-                                                error16x16, txfm_cache[1]);
-  txfm_size_8x8 = mbmi->txfm_size;
-  for (i = 0; i < NB_TXFM_MODES; i++) {
-    int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i];
-    if (tmp_rd < txfm_cache[0][i])
-      txfm_cache[0][i] = tmp_rd;
-  }
-
-  mbmi->txfm_size = TX_4X4;
-  error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
-                                       &rate4x4, &rate4x4_tokenonly,
-                                       &dist4x4, error16x16);
-  for (i = 0; i < NB_TXFM_MODES; i++) {
-    if (error4x4 < txfm_cache[0][i])
-      txfm_cache[0][i] = error4x4;
-  }
-
-  mbmi->mb_skip_coeff = 0;
-  if (y_intra16x16_skippable &&
-      ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
-       (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
-    mbmi->mb_skip_coeff = 1;
-    mbmi->mode = mode16x16;
-    mbmi->uv_mode = modeuv[cm->txfm_mode != ONLY_4X4];
-    rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
-    dist = dist16x16;
-    rate += rateuv[cm->txfm_mode != ONLY_4X4] -
-            rateuv_tokenonly[cm->txfm_mode != ONLY_4X4];
-    dist += (distuv[cm->txfm_mode != ONLY_4X4] >> 2);
-    mbmi->txfm_size = txfm_size_16x16;
-  } else if (error8x8 > error16x16) {
-    if (error4x4 < error16x16) {
-      rate = rateuv[TX_4X4] + rate4x4;
-      mbmi->mode = I4X4_PRED;
-      mbmi->txfm_size = TX_4X4;
-      dist = dist4x4 + (distuv[TX_4X4] >> 2);
-      mbmi->uv_mode = modeuv[TX_4X4];
-    } else {
-      mbmi->txfm_size = txfm_size_16x16;
-      mbmi->mode = mode16x16;
-      rate = rate16x16 + rateuv[mbmi->txfm_size != TX_4X4];
-      dist = dist16x16 + (distuv[mbmi->txfm_size != TX_4X4] >> 2);
-      mbmi->uv_mode = modeuv[mbmi->txfm_size != TX_4X4];
-    }
-    rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
-  } else {
-    if (error4x4 < error8x8) {
-      rate = rateuv[TX_4X4] + rate4x4;
-      mbmi->mode = I4X4_PRED;
-      mbmi->txfm_size = TX_4X4;
-      dist = dist4x4 + (distuv[TX_4X4] >> 2);
-      mbmi->uv_mode = modeuv[TX_4X4];
-    } else {
-      mbmi->mode = I8X8_PRED;
-      mbmi->txfm_size = txfm_size_8x8;
-      set_i8x8_block_modes(x, mode8x8);
-      rate = rate8x8 + rateuv[TX_4X4];
-      dist = dist8x8 + (distuv[TX_4X4] >> 2);
-    }
-    rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
-  }
-
-  for (i = 0; i < NB_TXFM_MODES; i++) {
-    x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
-        txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i];
-  }
-
-  *returnrate = rate;
-  *returndist = dist;
-}
-#endif
-
 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                   int mi_row, int mi_col,
                                   int *returnrate,
@@ -4679,20 +2470,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
   int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
                                              cpi->common.y_dc_delta_q);
-#if CONFIG_SB8X8
   int_mv seg_mvs[4][MAX_REF_FRAMES - 1];
   union b_mode_info best_bmodes[4];
   PARTITION_INFO best_partition;
-#endif
 
-#if CONFIG_SB8X8
   for (i = 0; i < 4; i++) {
     int j;
 
     for (j = 0; j < MAX_REF_FRAMES - 1; j++)
       seg_mvs[i][j].as_int = INVALID_MV;
   }
-#endif
   // Everywhere the flag is set the error is much higher than its neighbors.
   ctx->frames_with_high_error = 0;
   ctx->modes_with_high_error = 0;
@@ -4784,6 +2571,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     x->skip = 0;
     this_mode = vp9_mode_order[mode_index].mode;
     ref_frame = vp9_mode_order[mode_index].ref_frame;
+
     if (!(ref_frame == INTRA_FRAME
         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
       continue;
@@ -4805,7 +2593,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     mbmi->ref_frame = ref_frame;
     mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
 
-    // TODO(jingning): scaling not supported in SPLITMV mode.
+    // TODO(jingning, jkoleszar): scaling reference frame not supported for
+    // SPLITMV.
     if (mbmi->ref_frame > 0 &&
           (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
            yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
@@ -4818,7 +2607,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         this_mode == SPLITMV)
       continue;
 
-
     set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
                       scale_factor);
     comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
@@ -4837,16 +2625,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     // if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
     //  continue;
 
-    if (
-#if CONFIG_SB8X8
-        bsize != BLOCK_SIZE_SB8X8 &&
-        (this_mode == I4X4_PRED || this_mode == SPLITMV)
-#else
-        this_mode == I4X4_PRED ||
-        this_mode == I8X8_PRED ||
-        this_mode == SPLITMV
-#endif
-        )
+    if (bsize != BLOCK_SIZE_SB8X8 &&
+        (this_mode == I4X4_PRED || this_mode == SPLITMV))
       continue;
     //  if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME)
     //  continue;
@@ -4909,7 +2689,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       }
     }
 
-#if CONFIG_SB8X8
     if (this_mode == I4X4_PRED) {
       int rate;
 
@@ -4928,9 +2707,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       distortion2 += dist_uv[TX_4X4];
       distortion_uv = dist_uv[TX_4X4];
       mbmi->uv_mode = mode_uv[TX_4X4];
-    } else
-#endif
-    if (ref_frame == INTRA_FRAME) {
+    } else if (ref_frame == INTRA_FRAME) {
       TX_SIZE uv_tx;
       vp9_build_intra_predictors_sby_s(xd, bsize);
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
@@ -4953,7 +2730,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
-#if CONFIG_SB8X8
     } else if (this_mode == SPLITMV) {
       const int is_comp_pred = mbmi->second_ref_frame > 0;
       int rate, distortion;
@@ -5082,7 +2858,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       compmode_cost =
           vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
       mbmi->mode = this_mode;
-#endif
     } else {
       YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
       int fb;
@@ -5239,7 +3014,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         *returndistortion = distortion2;
         best_rd = this_rd;
         vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
-#if CONFIG_SB8X8
         vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
         if (this_mode == I4X4_PRED || this_mode == SPLITMV) {
@@ -5247,7 +3021,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
             best_bmodes[i] = xd->mode_info_context->bmi[i];
           }
         }
-#endif
       }
 #if 0
       // Testing this mode gave rise to an improvement in best error score.
@@ -5389,22 +3162,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     mbmi->mv[0].as_int = 0;
     mbmi->uv_mode = DC_PRED;
     mbmi->mb_skip_coeff = 1;
-#if !CONFIG_SB8X8
-    mbmi->partitioning = 0;
-#endif
     if (cm->txfm_mode == TX_MODE_SELECT) {
       if (bsize >= BLOCK_SIZE_SB32X32)
         mbmi->txfm_size = TX_32X32;
-#if CONFIG_SB8X8
       else if (bsize >= BLOCK_SIZE_MB16X16)
-#else
-      else
-#endif
         mbmi->txfm_size = TX_16X16;
-#if CONFIG_SB8X8
       else
         mbmi->txfm_size = TX_8X8;
-#endif
     }
 
     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
@@ -5414,7 +3178,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
   // macroblock modes
   vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
-#if CONFIG_SB8X8
   if (best_mbmode.mode == I4X4_PRED) {
     for (i = 0; i < 4; i++) {
       xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
@@ -5435,7 +3198,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
     mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
   }
-#endif
+
   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
     if (best_pred_rd[i] == INT64_MAX)
       best_pred_diff[i] = INT_MIN;
@@ -5458,11 +3221,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
                     scale_factor);
   store_coding_context(x, ctx, best_mode_index,
-#if CONFIG_SB8X8
                        &best_partition,
-#else
-                       NULL,
-#endif
                        &mbmi->ref_mvs[mbmi->ref_frame][0],
                        &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
                                       mbmi->second_ref_frame][0],
@@ -5470,42 +3229,3 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
   return best_rd;
 }
-
-#if !CONFIG_SB8X8
-void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
-                                    int mi_row, int mi_col,
-                                    int *totalrate, int *totaldist) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-  int rate, distortion;
-  int64_t intra_error = 0;
-  unsigned char *segment_id = &mbmi->segment_id;
-
-  x->encode_breakout = xd->segmentation_enabled ?
-                         cpi->segment_encode_breakout[*segment_id] :
-                         cpi->oxcf.encode_breakout;
-
-  // if (cpi->sf.RD)
-  // For now this codebase is limited to a single rd encode path
-  {
-    int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
-
-    rd_pick_inter_mode(cpi, x, mi_row, mi_col, &rate,
-                       &distortion, &intra_error);
-
-    /* restore cpi->zbin_mode_boost_enabled */
-    cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
-  }
-  // else
-  // The non rd encode path has been deleted from this code base
-  // to simplify development
-  //    vp9_pick_inter_mode
-
-  // Store metrics so they can be added in to totals if this mode is picked
-  x->mb_context[xd->sb_index][xd->mb_index].distortion  = distortion;
-  x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error;
-
-  *totalrate = rate;
-  *totaldist = distortion;
-}
-#endif
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 6533a82e011bd504754f299b66cb926e83a59e02..dcf5d00e9f04d97b3b8b49f4476e94815d3b6ace 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -19,21 +19,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex);
 
 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
 
-#if !CONFIG_SB8X8
-void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                            int *r, int *d);
-#endif
-
 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                int *r, int *d, BLOCK_SIZE_TYPE bsize,
                                PICK_MODE_CONTEXT *ctx);
 
-#if !CONFIG_SB8X8
-void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
-                                    int mi_row, int mi_col,
-                                    int *r, int *d);
-#endif
-
 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                   int mi_row, int mi_col,
                                   int *r, int *d, BLOCK_SIZE_TYPE bsize,
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index c53fff43bd792e321e4458bd0ba03b6237cee28e..fe995ad729fabff0d195d7ad1f33420f4e9c46ee 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -193,17 +193,11 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi,
     assert(bwl < bsl && bhl < bsl);
     if (bsize == BLOCK_SIZE_SB64X64) {
       subsize = BLOCK_SIZE_SB32X32;
-#if CONFIG_SB8X8
     } else if (bsize == BLOCK_SIZE_SB32X32) {
       subsize = BLOCK_SIZE_MB16X16;
     } else {
       assert(bsize == BLOCK_SIZE_MB16X16);
       subsize = BLOCK_SIZE_SB8X8;
-#else
-    } else {
-      assert(bsize == BLOCK_SIZE_SB32X32);
-      subsize = BLOCK_SIZE_MB16X16;
-#endif
     }
 
     for (n = 0; n < 4; n++) {
@@ -253,11 +247,11 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
     vp9_get_tile_col_offsets(cm, tile_col);
     mi_ptr = cm->mi + cm->cur_tile_mi_col_start;
     for (mi_row = 0; mi_row < cm->mi_rows;
-         mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) {
+         mi_row += 8, mi_ptr += 8 * mis) {
       mi = mi_ptr;
       for (mi_col = cm->cur_tile_mi_col_start;
            mi_col < cm->cur_tile_mi_col_end;
-           mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) {
+           mi_col += 8, mi += 8) {
         count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count,
                       t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64);
       }
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 7c2728bb5516106fead0e6dace655d9970adb832..9a6598581eea3525df95b622859d465c6ae188c7 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -375,9 +375,6 @@ int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
   int result = 1;
   struct is_skippable_args args = {xd, &result};
   foreach_transformed_block_in_plane(xd, bsize, 0,
-#if !CONFIG_SB8X8
-                                     0,
-#endif
                                      is_skippable, &args);
   return result;
 }