diff --git a/configure b/configure
index bfbe73d4f5f37bfab51add56d57d65b4fcd873a1..a198188ffc28fb32ef104bbabb8f132e2ef69000 100755
--- a/configure
+++ b/configure
@@ -224,6 +224,7 @@ EXPERIMENT_LIST="
     sixteenth_subpel_uv
     comp_intra_pred
     newentropy
+    superblocks
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 4504282f5c8c728046b8ea360d044e98723564e5..4563c8f7f469e0712fa34b5b034a1a4d730ea404 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -337,6 +337,9 @@ typedef struct MacroBlockD
 #if CONFIG_RUNTIME_CPU_DETECT
     struct VP8_COMMON_RTCD  *rtcd;
 #endif
+
+    int mb_index;   // Index of the MB in the SB (0..3)
+
 } MACROBLOCKD;
 
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index a741471064dc630e92cfe2a07929fa7e31179d06..955f54ad19c4adf4b2ec41321aaaedfe23d0cb08 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -81,7 +81,7 @@ void vp8_find_near_mvs
         else
             cnt[CNT_INTRA] += 2;
     }
-    /* Process above left or the one frome last frame */
+    /* Process above left or the one from last frame */
     if ( aboveleft->mbmi.ref_frame != INTRA_FRAME||
          (lf_here->mbmi.ref_frame==LAST_FRAME && refframe == LAST_FRAME))
     {
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index cc81c68837f383c2f85be51ef4bc290a43de1884..1c82f8ff8f18dbb77d591f9aae3a92994e29d65f 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -666,7 +666,6 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     const int mis = pbi->common.mode_info_stride;
     MACROBLOCKD *const xd  = & pbi->mb;
 
-    int index = mb_row * pbi->common.mb_cols + mb_col;
     int_mv *const mv = & mbmi->mv;
     int mb_to_left_edge;
     int mb_to_right_edge;
@@ -1017,91 +1016,75 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
 void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 {
-    MODE_INFO *mi = pbi->common.mi;
-
-    MODE_INFO *prev_mi = pbi->common.prev_mi;
+    int i;
+    VP8_COMMON *cm = &pbi->common;
+    MODE_INFO *mi = cm->mi;
+    int sb_row, sb_col;
+    int sb_rows = (cm->mb_rows + 1)>>1;
+    int sb_cols = (cm->mb_cols + 1)>>1;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
 
-    int mb_row = -1;
-
-#if 0
-    FILE *statsfile;
-    statsfile = fopen("decsegmap.stt", "a");
-    fprintf(statsfile, "\n" );
-#endif
+    MODE_INFO *prev_mi = cm->prev_mi;
 
     mb_mode_mv_init(pbi);
 
 #if CONFIG_QIMODE
-    if(pbi->common.frame_type==KEY_FRAME && !pbi->common.kf_ymode_probs_update)
+    if(cm->frame_type==KEY_FRAME && !cm->kf_ymode_probs_update)
     {
-        pbi->common.kf_ymode_probs_index = vp8_read_literal(&pbi->bc, 3);
+        cm->kf_ymode_probs_index = vp8_read_literal(&pbi->bc, 3);
     }
 #endif
 
-    while (++mb_row < pbi->common.mb_rows)
+    for (sb_row=0; sb_row<sb_rows; sb_row++)
     {
-        int mb_col = -1;
-        int mb_to_top_edge;
-        int mb_to_bottom_edge;
-
-        pbi->mb.mb_to_top_edge =
-        mb_to_top_edge = -((mb_row * 16)) << 3;
-        mb_to_top_edge -= LEFT_TOP_MARGIN;
-
-        pbi->mb.mb_to_bottom_edge =
-        mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
-        mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+        int mb_col = 0;
+        int mb_row = (sb_row <<1);
 
-#if 0
-        fprintf(statsfile, "\n" );
-#endif
-
-        while (++mb_col < pbi->common.mb_cols)
+        for (sb_col=0; sb_col<sb_cols; sb_col++)
         {
-            /*read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
-            if(pbi->common.frame_type == KEY_FRAME)
-            {
-                //printf("<%d %d> \n", mb_row, mb_col);
-                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
-            }
-            else
+            for ( i=0; i<4; i++ )
             {
-                read_mb_modes_mv(pbi, mi, &mi->mbmi,
-                prev_mi,
-                mb_row, mb_col);
-            }
+                int mb_to_top_edge;
+                int mb_to_bottom_edge;
 
-            //printf("%3d", mi->mbmi.mode);
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * cm->mode_info_stride + dx;
 
-            /*
-            if(pbi->common.current_video_frame==7)
-            {
-                FILE *fmode=fopen("kfmode.txt", "a");
-                fprintf(fmode, "%3d:%3d:%d\n",mb_row, mb_col, mi->mbmi.mode);
-                fclose(fmode);
+                if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+                {
+                    /* next macroblock */
+                    mb_row += dy;
+                    mb_col += dx;
+                    mi += offset_extended;
+                    prev_mi += offset_extended;
+                    continue;
+                }
 
-            }*/
-            /*
-            if(mi->mbmi.mode==I8X8_PRED)
-            {
-                printf("F%3d:%d:%d\n", pbi->common.current_video_frame, mb_row, mb_col);
-            }
-            */
-#if 0
-            fprintf(statsfile, "%2d%2d%2d   ",
-                mi->mbmi.segment_id, mi->mbmi.ref_frame, mi->mbmi.mode );
-#endif
-            prev_mi++;
-            mi++;       /* next macroblock */
-        }
-       // printf("\n");
-        prev_mi++;
-        mi++;           /* skip left predictor each row */
-    }
+                pbi->mb.mb_to_top_edge = mb_to_top_edge = -((mb_row * 16)) << 3;
+                                         mb_to_top_edge -= LEFT_TOP_MARGIN;
 
-#if 0
-    fclose(statsfile);
-#endif
+                pbi->mb.mb_to_bottom_edge =
+                mb_to_bottom_edge =
+                        ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+                mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
+                if(cm->frame_type == KEY_FRAME)
+                    vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+                else
+                    read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row,
+                                     mb_col);
+
+                /* next macroblock */
+                mb_row += dy;
+                mb_col += dx;
+                mi += offset_extended;
+                prev_mi += offset_extended;
+            }
+        }
 
+        mi += cm->mode_info_stride + (1 - (cm->mb_cols & 0x1));
+        prev_mi += cm->mode_info_stride + (1 - (cm->mb_cols & 0x1));
+    }
 }
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index c1e70a21254536c9d696690f376bf2a256fc60f5..ad7de109b63915593af66a4a64b4e4d8c70b92ab 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -240,8 +240,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
         }
 #endif
 
-
-
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
@@ -256,10 +254,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
                 RECON_INVOKE(&pbi->common.rtcd.recon,
                     build_intra_predictors_mby)(xd);
             }
+#if 0
+            // Intra-modes requiring recon data from top-right
+            // MB have been temporarily disabled.
             else
             {
                 vp8_intra_prediction_down_copy(xd);
             }
+#endif
         }
     }
     else
@@ -431,7 +433,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
                 (xd->qcoeff+16*16, xd->block[16].dequant,
                 xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                 xd->dst.uv_stride, xd->eobs+16);
-
 }
 
 
@@ -459,102 +460,139 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
 FILE *vpxlog = 0;
 #endif
 
+/* Decode a row of Superblocks (2x2 region of MBs) */
 static void
-decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
+decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd)
 {
+    int i;
+    int sb_col;
+    int mb_row, mb_col;
     int recon_yoffset, recon_uvoffset;
-    int mb_col;
     int ref_fb_idx = pc->lst_fb_idx;
     int dst_fb_idx = pc->new_fb_idx;
     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+    int sb_cols = (pc->mb_cols + 1)>>1;
+    ENTROPY_CONTEXT_PLANES left_context[2];
 
-    vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
-    recon_yoffset = mb_row * recon_y_stride * 16;
-    recon_uvoffset = mb_row * recon_uv_stride * 8;
-    /* reset above block coeffs */
-
-    xd->above_context = pc->above_context;
-    xd->up_available = (mb_row != 0);
+    // For a SB there are 2 left contexts, each pertaining to a MB row within
+    vpx_memset(left_context, 0, sizeof(left_context));
 
-    xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+    mb_row = mbrow;
+    mb_col = 0;
 
-    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+    for (sb_col=0; sb_col<sb_cols; sb_col++)
     {
-        /* Distance of Mb to the various image edges.
-         * These are specified to 8th pel as they are always compared to values
-         * that are in 1/8th pel units
-         */
-        xd->mb_to_left_edge = -((mb_col * 16) << 3);
-        xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+        // Process the 4 MBs within the SB in the order:
+        // top-left, top-right, bottom-left, bottom-right
+        for ( i=0; i<4; i++ )
+        {
+            int dy = row_delta[i];
+            int dx = col_delta[i];
+            int offset_extended = dy * xd->mode_info_stride + dx;
 
-        update_blockd_bmi(xd);
+            if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+            {
+                // MB lies outside frame, skip on to next
+                mb_row += dy;
+                mb_col += dx;
+                xd->mode_info_context += offset_extended;
+                continue;
+            }
 
-        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
-        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+#ifdef DEC_DEBUG
+            dec_debug = (pc->current_video_frame==0 && mb_row==0 && mb_col==0);
+#endif
+            // Copy in the appropriate left context for this MB row
+            vpx_memcpy (&pc->left_context,
+                        &left_context[i>>1],
+                        sizeof(ENTROPY_CONTEXT_PLANES));
 
-        xd->left_available = (mb_col != 0);
+            // Set above context pointer
+            xd->above_context = pc->above_context + mb_col;
 
-        /* Select the appropriate reference frame for this MB */
-        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
-            ref_fb_idx = pc->lst_fb_idx;
-        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
-            ref_fb_idx = pc->gld_fb_idx;
-        else
-            ref_fb_idx = pc->alt_fb_idx;
+            /* Distance of Mb to the various image edges.
+             * These are specified to 8th pel as they are always compared to
+             * values that are in 1/8th pel units
+             */
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
 
-        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
-        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
-        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+            xd->mb_to_left_edge = -((mb_col * 16) << 3);
+            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
-        if (xd->mode_info_context->mbmi.second_ref_frame)
-        {
-            int second_ref_fb_idx;
+            xd->up_available = (mb_row != 0);
+            xd->left_available = (mb_col != 0);
+
+            update_blockd_bmi(xd);
+
+            recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+            recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+            xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+            xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
             /* Select the appropriate reference frame for this MB */
-            if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
-                second_ref_fb_idx = pc->lst_fb_idx;
-            else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
-                second_ref_fb_idx = pc->gld_fb_idx;
+            if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+                ref_fb_idx = pc->lst_fb_idx;
+            else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+                ref_fb_idx = pc->gld_fb_idx;
             else
-                second_ref_fb_idx = pc->alt_fb_idx;
+                ref_fb_idx = pc->alt_fb_idx;
 
-            xd->second_pre.y_buffer = pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
-            xd->second_pre.u_buffer = pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
-            xd->second_pre.v_buffer = pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
-        }
+            xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer +recon_yoffset;
+            xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer +recon_uvoffset;
+            xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer +recon_uvoffset;
 
-        if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
-        {
-            /* propagate errors from reference frames */
-            xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
-        }
+            if (xd->mode_info_context->mbmi.second_ref_frame)
+            {
+                int second_ref_fb_idx;
+
+                /* Select the appropriate reference frame for this MB */
+                if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+                    second_ref_fb_idx = pc->lst_fb_idx;
+                else if (xd->mode_info_context->mbmi.second_ref_frame ==
+                                                                   GOLDEN_FRAME)
+                    second_ref_fb_idx = pc->gld_fb_idx;
+                else
+                    second_ref_fb_idx = pc->alt_fb_idx;
+
+                xd->second_pre.y_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
+                xd->second_pre.u_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
+                xd->second_pre.v_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+            }
 
-#ifdef DEC_DEBUG
-        dec_debug = (pc->current_video_frame==1 && mb_row==4 && mb_col==0);
-#endif
-        decode_macroblock(pbi, xd, mb_row * pc->mb_cols  + mb_col);
+            if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
+            {
+                /* propagate errors from reference frames */
+                xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+            }
 
-        /* check if the boolean decoder has suffered an error */
-        xd->corrupted |= vp8dx_bool_error(xd->current_bc);
-        recon_yoffset += 16;
-        recon_uvoffset += 8;
+            decode_macroblock(pbi, xd, mb_row * pc->mb_cols + mb_col);
 
-        ++xd->mode_info_context;  /* next mb */
+            /* check if the boolean decoder has suffered an error */
+            xd->corrupted |= vp8dx_bool_error(xd->current_bc);
 
-        xd->above_context++;
+            // Store the modified left context for the MB row locally
+            vpx_memcpy (&left_context[i>>1],
+                        &pc->left_context,
+                        sizeof(ENTROPY_CONTEXT_PLANES));
 
+            // skip to next MB
+            xd->mode_info_context += offset_extended;
+            mb_row += dy;
+            mb_col += dx;
+        }
     }
 
-    /* adjust to the next row of mbs */
-    vp8_extend_mb_row(
-        &pc->yv12_fb[dst_fb_idx],
-        xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-    );
-
-    ++xd->mode_info_context;      /* skip prediction column */
+    /* skip prediction column */
+    xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride;
 }
 
 static unsigned int read_partition_size(const unsigned char *cx_size)
@@ -797,8 +835,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
             if (Width != pc->Width  ||  Height != pc->Height)
             {
-                int prev_mb_rows = pc->mb_rows;
-
                 if (pc->Width <= 0)
                 {
                     pc->Width = Width;
@@ -1228,10 +1264,10 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     // Resset the macroblock mode info context to the start of the list
     xd->mode_info_context = pc->mi;
 
-    /* Decode a row of macro blocks */
-    for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
+    /* Decode a row of superblocks */
+    for (mb_row = 0; mb_row < pc->mb_rows; mb_row+=2)
     {
-        decode_mb_row(pbi, pc, mb_row, xd);
+        decode_sb_row(pbi, pc, mb_row, xd);
     }
     corrupt_tokens |= xd->corrupted;
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 96649b9e6a83689419c907170e7eedc92e097926..201585f8b1fc60c9f98a0958060ac764beb81f74 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -477,7 +477,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     }
 
 #if CONFIG_DEBUG
-    vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+    if(cm->show_frame)
+        vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
 #endif
 
     vp8_clear_system_state();
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 7288ead68095c657c540a6a81170853a7858d1dc..1ee6653c1b16e9972b851bfca9e5b5d938091a1a 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -577,6 +577,7 @@ static void update_ref_probs( VP8_COMP *const cpi )
 
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
+    int i;
     VP8_COMMON *const pc = & cpi->common;
     vp8_writer *const w = & cpi->bc;
     const MV_CONTEXT *mvc = pc->fc.mvc;
@@ -584,16 +585,12 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
     const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
 #endif
     MACROBLOCKD *xd = &cpi->mb.e_mbd;
-
-    int i;
-    int pred_context;
-
-
-    MODE_INFO *m = pc->mi;
-    MODE_INFO *prev_m = pc->prev_mi;
+    MODE_INFO *m;
+    MODE_INFO *prev_m;
 
     const int mis = pc->mode_info_stride;
-    int mb_row = -1;
+    int mb_row, mb_col;
+    int row, col;
 
 #if CONFIG_NEWENTROPY
     int prob_skip_false[3] = {0, 0, 0};
@@ -605,6 +602,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
     vp8_prob pred_prob;
     unsigned char prediction_flag;
 
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
     cpi->mb.partition_info = cpi->mb.pi;
 
     // Update the probabilities used to encode reference frame data
@@ -624,7 +624,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
             if ( (cpi->skip_false_count[k] + cpi->skip_true_count[k]) )
             {
                 prob_skip_false[k] = cpi->skip_false_count[k] * 256 /
-                                  (cpi->skip_false_count[k] + cpi->skip_true_count[k]);
+                (cpi->skip_false_count[k] + cpi->skip_true_count[k]);
 
                 if (prob_skip_false[k] <= 1)
                     prob_skip_false[k] = 1;
@@ -701,265 +701,310 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 #endif
     vp8_write_mvprobs(cpi);
 
-    while (++mb_row < pc->mb_rows)
+    mb_row = 0;
+    for (row=0; row < pc->mb_rows; row += 2)
     {
-        int mb_col = -1;
+        m = pc->mi + row * mis;
+        prev_m = pc->prev_mi + row * mis;
 
-        while (++mb_col < pc->mb_cols)
+        mb_col = 0;
+        for (col=0; col < pc->mb_cols; col += 2)
         {
-            const MB_MODE_INFO *const mi = & m->mbmi;
-            const MV_REFERENCE_FRAME rf = mi->ref_frame;
-            const MB_PREDICTION_MODE mode = mi->mode;
-            const int segment_id = mi->segment_id;
+            int i;
+
+            // Process the 4 MBs in the order:
+            // top-left, top-right, bottom-left, bottom-right
+            for (i=0; i<4; i++)
+            {
+                const MB_MODE_INFO *const mi = & m->mbmi;
+                const MV_REFERENCE_FRAME rf = mi->ref_frame;
+                const MB_PREDICTION_MODE mode = mi->mode;
+                const int segment_id = mi->segment_id;
 
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-            xd->mb_to_left_edge = -((mb_col * 16) << 3);
-            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
 
-            // Make sure the MacroBlockD mode info pointer is set correctly
-            xd->mode_info_context = m;
+                if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+                {
+                    // MB lies outside frame, move on
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    prev_m += offset_extended;
+                    cpi->mb.partition_info += offset_extended;
+                    continue;
+                }
+
+                // Distance of Mb to the various image edges.
+                // These specified to 8th pel as they are always compared to MV
+                // values that are in 1/8th pel units
+                xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+                xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+                xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
 
-            xd->prev_mode_info_context = prev_m;
+                // Make sure the MacroBlockD mode info pointer is set correctly
+                xd->mode_info_context = m;
+
+                xd->prev_mode_info_context = prev_m;
 
 #ifdef ENTROPY_STATS
-            active_section = 9;
+                active_section = 9;
 #endif
 
-            if (cpi->mb.e_mbd.update_mb_segmentation_map)
-            {
-                // Is temporal coding of the segment map enabled
-                if (pc->temporal_update)
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
                 {
-                    prediction_flag =
-                        get_pred_flag( xd, PRED_SEG_ID );
-                    pred_prob =
-                        get_pred_prob( pc, xd, PRED_SEG_ID);
+                    // Is temporal coding of the segment map enabled
+                    if (pc->temporal_update)
+                    {
+                        prediction_flag = get_pred_flag( xd, PRED_SEG_ID );
+                        pred_prob = get_pred_prob( pc, xd, PRED_SEG_ID);
 
-                    // Code the segment id prediction flag for this mb
-                    vp8_write( w, prediction_flag, pred_prob );
+                        // Code the segment id prediction flag for this mb
+                        vp8_write( w, prediction_flag, pred_prob );
 
-                    // If the mbs segment id was not predicted code explicitly
-                    if (!prediction_flag)
+                        // If the mb segment id wasn't predicted code explicitly
+                        if (!prediction_flag)
+                            write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
+                    else
+                    {
+                        // Normal unpredicted coding
                         write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
                 }
-                else
-                {
-                    // Normal undpredicted coding
-                    write_mb_segid(w, mi, &cpi->mb.e_mbd);
-                }
-            }
 
-            if ( pc->mb_no_coeff_skip &&
-                 ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
-                   ( get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0 ) ) )
-            {
+                if ( pc->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       ( get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0 ) ) )
+                {
 #if CONFIG_NEWENTROPY
-                vp8_encode_bool(w, mi->mb_skip_coeff,
-                                get_pred_prob(pc, xd, PRED_MBSKIP));
+                    vp8_encode_bool(w, mi->mb_skip_coeff,
+                                    get_pred_prob(pc, xd, PRED_MBSKIP));
 #else
-                vp8_encode_bool(w, mi->mb_skip_coeff, prob_skip_false);
+                    vp8_encode_bool(w, mi->mb_skip_coeff, prob_skip_false);
 #endif
-            }
+                }
 
-            // Encode the reference frame.
-            encode_ref_frame( w, pc, xd,
-                              segment_id, rf );
+                // Encode the reference frame.
+                encode_ref_frame( w, pc, xd, segment_id, rf );
 
-            if (rf == INTRA_FRAME)
-            {
+                if (rf == INTRA_FRAME)
+                {
 #ifdef ENTROPY_STATS
-                active_section = 6;
+                    active_section = 6;
 #endif
 
-                if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
-                    write_ymode(w, mode, pc->fc.ymode_prob);
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                        write_ymode(w, mode, pc->fc.ymode_prob);
 
-                if (mode == B_PRED)
-                {
-                    int j = 0;
+                    if (mode == B_PRED)
+                    {
+                        int j = 0;
 #if CONFIG_COMP_INTRA_PRED
-                    int uses_second = m->bmi[0].as_mode.second != (B_PREDICTION_MODE) (B_DC_PRED - 1);
-                    vp8_write(w, uses_second, 128);
+                        int uses_second =
+                                m->bmi[0].as_mode.second !=
+                                        (B_PREDICTION_MODE) (B_DC_PRED - 1);
+                        vp8_write(w, uses_second, 128);
 #endif
-                    do {
+                        do {
 #if CONFIG_COMP_INTRA_PRED
-                        B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second;
+                            B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second;
 #endif
-                        write_bmode(w, m->bmi[j].as_mode.first, pc->fc.bmode_prob);
+                            write_bmode(w, m->bmi[j].as_mode.first,
+                                        pc->fc.bmode_prob);
 #if CONFIG_COMP_INTRA_PRED
-                        if (uses_second)
-                        {
-                            write_bmode(w, mode2, pc->fc.bmode_prob);
-                        }
+                            if (uses_second)
+                            {
+                                write_bmode(w, mode2, pc->fc.bmode_prob);
+                            }
 #endif
-                    } while (++j < 16);
-                }
-                if(mode == I8X8_PRED)
-                {
-                    write_i8x8_mode(w, m->bmi[0].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[2].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[8].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[10].as_mode.first, pc->i8x8_mode_prob);
-                }
-                else
-                {
+                        } while (++j < 16);
+                    }
+                    if(mode == I8X8_PRED)
+                    {
+                        write_i8x8_mode(w, m->bmi[0].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[2].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[8].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[10].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                    }
+                    else
+                    {
 #if CONFIG_UVINTRA
-                    write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob[mode]);
+                        write_uv_mode(w, mi->uv_mode,
+                                      pc->fc.uv_mode_prob[mode]);
 #ifdef MODE_STATS
-                    if(mode!=B_PRED)
-                        ++cpi->y_uv_mode_count[mode][mi->uv_mode];
+                        if(mode!=B_PRED)
+                            ++cpi->y_uv_mode_count[mode][mi->uv_mode];
 #endif
 
 #else
-                    write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob);
+                        write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob);
 #endif /*CONFIG_UVINTRA*/
 
+                    }
                 }
-            }
-            else
-            {
-                int_mv best_mv;
-                int ct[4];
+                else
+                {
+                    int_mv best_mv;
+                    int ct[4];
 
-                vp8_prob mv_ref_p [VP8_MVREFS-1];
+                    vp8_prob mv_ref_p [VP8_MVREFS-1];
 
-                {
-                    int_mv n1, n2;
+                    {
+                        int_mv n1, n2;
 
-                    vp8_find_near_mvs(xd, m,
-                        prev_m,
-                        &n1, &n2, &best_mv, ct, rf, cpi->common.ref_frame_sign_bias);
-                    vp8_mv_ref_probs(&cpi->common, mv_ref_p, ct);
+                        vp8_find_near_mvs(xd, m, prev_m, &n1, &n2, &best_mv, ct,
+                                          rf, cpi->common.ref_frame_sign_bias);
+                        vp8_mv_ref_probs(&cpi->common, mv_ref_p, ct);
 
 
 #ifdef ENTROPY_STATS
-                    accum_mv_refs(mode, ct);
+                        accum_mv_refs(mode, ct);
 #endif
-                }
+                    }
 
 #ifdef ENTROPY_STATS
-                active_section = 3;
+                    active_section = 3;
 #endif
 
-                // Is the segment coding of mode enabled
-                if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
-                {
-                    write_mv_ref(w, mode, mv_ref_p);
-                    vp8_accum_mv_refs(&cpi->common, mode, ct);
-                }
+                    // Is the segment coding of mode enabled
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                    {
+                        write_mv_ref(w, mode, mv_ref_p);
+                        vp8_accum_mv_refs(&cpi->common, mode, ct);
+                    }
 
-                {
-                    switch (mode)   /* new, split require MVs */
                     {
-                    case NEWMV:
+                        switch (mode)   /* new, split require MVs */
+                        {
+                        case NEWMV:
 #ifdef ENTROPY_STATS
-                        active_section = 5;
+                            active_section = 5;
 #endif
 
 #if CONFIG_HIGH_PRECISION_MV
-                        if (xd->allow_high_precision_mv)
-                            write_mv_hp(w, &mi->mv.as_mv, &best_mv, mvc_hp);
-                        else
+                            if (xd->allow_high_precision_mv)
+                                write_mv_hp(w, &mi->mv.as_mv, &best_mv, mvc_hp);
+                            else
 #endif
-                        write_mv(w, &mi->mv.as_mv, &best_mv, mvc);
+                            write_mv(w, &mi->mv.as_mv, &best_mv, mvc);
 
-                        if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
-                        {
-                            vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
-                                      get_pred_prob( pc, xd, PRED_COMP ) );
-                        }
-                        if (mi->second_ref_frame)
-                        {
-                            const int second_rf = mi->second_ref_frame;
-                            int_mv n1, n2;
-                            int ct[4];
-                            vp8_find_near_mvs(xd, m,
+                            if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+                            {
+                                vp8_write(w,
+                                          mi->second_ref_frame != INTRA_FRAME,
+                                          get_pred_prob( pc, xd, PRED_COMP ) );
+                            }
+                            if (mi->second_ref_frame)
+                            {
+                                const int second_rf = mi->second_ref_frame;
+                                int_mv n1, n2;
+                                int ct[4];
+                                vp8_find_near_mvs(xd, m,
                                               prev_m,
                                               &n1, &n2, &best_mv,
                                               ct, second_rf,
                                               cpi->common.ref_frame_sign_bias);
 #if CONFIG_HIGH_PRECISION_MV
-                            if (xd->allow_high_precision_mv)
-                                write_mv_hp(w, &mi->second_mv.as_mv, &best_mv, mvc_hp);
-                            else
+                                if (xd->allow_high_precision_mv)
+                                    write_mv_hp(w, &mi->second_mv.as_mv,
+                                                &best_mv, mvc_hp);
+                                else
 #endif
-                            write_mv(w, &mi->second_mv.as_mv, &best_mv, mvc);
-                        }
-                        break;
-                    case SPLITMV:
-                    {
-                        int j = 0;
+                                write_mv(w, &mi->second_mv.as_mv, &best_mv,
+                                         mvc);
+                            }
+                            break;
+                        case SPLITMV:
+                        {
+                            int j = 0;
 
 #ifdef MODE_STATS
-                        ++count_mb_seg [mi->partitioning];
+                            ++count_mb_seg [mi->partitioning];
 #endif
 
-                        write_split(w, mi->partitioning);
+                            write_split(w, mi->partitioning);
 
-                        do
-                        {
-                            B_PREDICTION_MODE blockmode;
-                            int_mv blockmv;
-                            const int *const  L = vp8_mbsplits [mi->partitioning];
-                            int k = -1;  /* first block in subset j */
-                            int mv_contz;
-                            int_mv leftmv, abovemv;
-
-                            blockmode =  cpi->mb.partition_info->bmi[j].mode;
-                            blockmv =  cpi->mb.partition_info->bmi[j].mv;
+                            do
+                            {
+                                B_PREDICTION_MODE blockmode;
+                                int_mv blockmv;
+                                const int *const  L =
+                                        vp8_mbsplits [mi->partitioning];
+                                int k = -1;  /* first block in subset j */
+                                int mv_contz;
+                                int_mv leftmv, abovemv;
+
+                                blockmode = cpi->mb.partition_info->bmi[j].mode;
+                                blockmv = cpi->mb.partition_info->bmi[j].mv;
 #if CONFIG_DEBUG
-                            while (j != L[++k])
-                                if (k >= 16)
-                                    assert(0);
+                                while (j != L[++k])
+                                    if (k >= 16)
+                                        assert(0);
 #else
-                            while (j != L[++k]);
+                                while (j != L[++k]);
 #endif
-                            leftmv.as_int = left_block_mv(m, k);
-                            abovemv.as_int = above_block_mv(m, k, mis);
-                            mv_contz = vp8_mv_cont(&leftmv, &abovemv);
+                                leftmv.as_int = left_block_mv(m, k);
+                                abovemv.as_int = above_block_mv(m, k, mis);
+                                mv_contz = vp8_mv_cont(&leftmv, &abovemv);
 
-                            write_sub_mv_ref(w, blockmode, vp8_sub_mv_ref_prob2 [mv_contz]);
+                                write_sub_mv_ref(w, blockmode,
+                                               vp8_sub_mv_ref_prob2 [mv_contz]);
 
-                            if (blockmode == NEW4X4)
-                            {
+                                if (blockmode == NEW4X4)
+                                {
 #ifdef ENTROPY_STATS
-                                active_section = 11;
+                                    active_section = 11;
 #endif
 #if CONFIG_HIGH_PRECISION_MV
-                                if (xd->allow_high_precision_mv)
-                                    write_mv_hp(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT_HP *) mvc_hp);
-                                else
+                                    if (xd->allow_high_precision_mv)
+                                        write_mv_hp(w, &blockmv.as_mv, &best_mv,
+                                                (const MV_CONTEXT_HP *) mvc_hp);
+                                    else
 #endif
-                                write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
+                                    write_mv(w, &blockmv.as_mv, &best_mv,
+                                             (const MV_CONTEXT *) mvc);
+                                }
                             }
-                        }
-                        while (++j < cpi->mb.partition_info->count);
-                    }
-                    break;
-                    default:
-                        if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
-                        {
-                            vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
-                                      get_pred_prob( pc, xd, PRED_COMP ) );
+                            while (++j < cpi->mb.partition_info->count);
                         }
                         break;
+                        default:
+                            if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+                            {
+                                vp8_write(w,
+                                          mi->second_ref_frame != INTRA_FRAME,
+                                          get_pred_prob( pc, xd, PRED_COMP ) );
+                            }
+                            break;
+                        }
                     }
                 }
-            }
 
-            ++m;
-            ++prev_m;
-            assert((prev_m-cpi->common.prev_mip)==(m-cpi->common.mip));
-            assert((prev_m-cpi->common.prev_mi)==(m-cpi->common.mi));
-            cpi->mb.partition_info++;
+                // Next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+                prev_m += offset_extended;
+                cpi->mb.partition_info += offset_extended;
+#if CONFIG_DEBUG
+                assert((prev_m-cpi->common.prev_mip)==(m-cpi->common.mip));
+                assert((prev_m-cpi->common.prev_mi)==(m-cpi->common.mi));
+#endif
+            }
         }
 
-        ++m;  /* skip L prediction border */
-        ++prev_m;
-        cpi->mb.partition_info++;
+        // Next SB
+        mb_row += 2;
+        m += mis + (1 - (pc->mb_cols & 0x1));
+        prev_m += mis + (1 - (pc->mb_cols & 0x1));
+        cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1));
     }
 }
 
@@ -968,14 +1013,17 @@ static void write_kfmodes(VP8_COMP *cpi)
     vp8_writer *const bc = & cpi->bc;
     VP8_COMMON *const c = & cpi->common;
     const int mis = c->mode_info_stride;
-    /* const */
-    MODE_INFO *m = c->mi;
-    int mb_row = -1;
+    MODE_INFO *m;
+    int i;
+    int row, col;
+    int mb_row, mb_col;
 #if CONFIG_NEWENTROPY
     int prob_skip_false[3] = {0, 0, 0};
 #else
     int prob_skip_false = 0;
 #endif
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
 
     MACROBLOCKD *xd = &cpi->mb.e_mbd;
 
@@ -1030,87 +1078,115 @@ static void write_kfmodes(VP8_COMP *cpi)
     }
 #endif
 
-    while (++mb_row < c->mb_rows)
+    mb_row = 0;
+    for (row=0; row < c->mb_rows; row += 2)
     {
-        int mb_col = -1;
+        m = c->mi + row * mis;
 
-        while (++mb_col < c->mb_cols)
+        mb_col = 0;
+        for (col=0; col < c->mb_cols; col += 2)
         {
-            const int ym = m->mbmi.mode;
-            int segment_id = m->mbmi.segment_id;
-
-            xd->mode_info_context = m;
-
-            if (cpi->mb.e_mbd.update_mb_segmentation_map)
+            // Process the 4 MBs in the order:
+            // top-left, top-right, bottom-left, bottom-right
+            for (i=0; i<4; i++)
             {
-                write_mb_segid(bc, &m->mbmi, &cpi->mb.e_mbd);
-            }
+                int ym;
+                int segment_id;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
 
-            if ( c->mb_no_coeff_skip &&
-                 ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
-                   (get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0) ) )
-            {
-#if CONFIG_NEWENTROPY
-                vp8_encode_bool(bc, m->mbmi.mb_skip_coeff,
-                                get_pred_prob(c, xd, PRED_MBSKIP));
-#else
-                vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
-#endif
-            }
-#if CONFIG_QIMODE
-            kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
-#else
-            kfwrite_ymode(bc, ym, c->kf_ymode_prob);
-#endif
-            if (ym == B_PRED)
-            {
-                const int mis = c->mode_info_stride;
-                int i = 0;
-#if CONFIG_COMP_INTRA_PRED
-                int uses_second = m->bmi[0].as_mode.second != (B_PREDICTION_MODE) (B_DC_PRED - 1);
-                vp8_write(bc, uses_second, 128);
-#endif
-                do
+                if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols))
                 {
-                    const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
-                    const B_PREDICTION_MODE L = left_block_mode(m, i);
-                    const int bm = m->bmi[i].as_mode.first;
-#if CONFIG_COMP_INTRA_PRED
-                    const int bm2 = m->bmi[i].as_mode.second;
-#endif
+                    // MB lies outside frame, move on
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    continue;
+                }
 
-#ifdef ENTROPY_STATS
-                    ++intra_mode_stats [A] [L] [bm];
-#endif
+                ym = m->mbmi.mode;
+                segment_id = m->mbmi.segment_id;
 
-                    write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
-#if CONFIG_COMP_INTRA_PRED
-                    if (uses_second)
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
+                {
+                    write_mb_segid(bc, &m->mbmi, &cpi->mb.e_mbd);
+                }
+
+                if ( c->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       (get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0) ) )
+                {
+    #if CONFIG_NEWENTROPY
+                    vp8_encode_bool(bc, m->mbmi.mb_skip_coeff,
+                                    get_pred_prob(c, xd, PRED_MBSKIP));
+    #else
+                    vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
+    #endif
+                }
+    #if CONFIG_QIMODE
+                kfwrite_ymode(bc, ym,
+                              c->kf_ymode_prob[c->kf_ymode_probs_index]);
+    #else
+                kfwrite_ymode(bc, ym, c->kf_ymode_prob);
+    #endif
+                if (ym == B_PRED)
+                {
+                    const int mis = c->mode_info_stride;
+                    int i = 0;
+    #if CONFIG_COMP_INTRA_PRED
+                    int uses_second =
+                            m->bmi[0].as_mode.second !=
+                                    (B_PREDICTION_MODE) (B_DC_PRED - 1);
+                    vp8_write(bc, uses_second, 128);
+    #endif
+                    do
                     {
-                        write_bmode(bc, bm2, c->kf_bmode_prob [A] [L]);
+                        const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+                        const B_PREDICTION_MODE L = left_block_mode(m, i);
+                        const int bm = m->bmi[i].as_mode.first;
+    #if CONFIG_COMP_INTRA_PRED
+                        const int bm2 = m->bmi[i].as_mode.second;
+    #endif
+
+    #ifdef ENTROPY_STATS
+                        ++intra_mode_stats [A] [L] [bm];
+    #endif
+
+                        write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
+    #if CONFIG_COMP_INTRA_PRED
+                        if (uses_second)
+                        {
+                            write_bmode(bc, bm2, c->kf_bmode_prob [A] [L]);
+                        }
+    #endif
                     }
-#endif
+                    while (++i < 16);
                 }
-                while (++i < 16);
-            }
-            if(ym == I8X8_PRED)
-            {
-                write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[2].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[8].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->i8x8_mode_prob);
-            }
-            else
+                if(ym == I8X8_PRED)
+                {
+                    write_i8x8_mode(bc, m->bmi[0].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[2].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[8].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[10].as_mode.first,
+                                    c->i8x8_mode_prob);
+                }
+                else
 #if CONFIG_UVINTRA
-                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+                    write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
 #else
-                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob);
+                    write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob);
 #endif
-
-            m++;
+                // Next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+            }
         }
-        //printf("\n");
-        m++;    // skip L prediction border
+        mb_row += 2;
     }
 }
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 25d2398cec47ce32e44798a2ceec5a3d6e64083a..bc40af5ee23b570c4a6fb2029c8c6606a8999151 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -61,14 +61,31 @@ typedef struct
     } bmi[16];
 } PARTITION_INFO;
 
+// Structure to hold snapshot of coding context during the mode picking process
+// TODO Do we need all of these?
 typedef struct
 {
-    DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+    MODE_INFO mic;
+    PARTITION_INFO partition_info;
+    int_mv best_ref_mv;
+    int rate;
+    int distortion;
+    int intra_error;
+    int best_mode_index;
+    int rddiv;
+    int rdmult;
+
+} PICK_MODE_CONTEXT;
+
+typedef struct
+{
+    DECLARE_ALIGNED(16, short, src_diff[400]);  // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
     DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
-    DECLARE_ALIGNED(16, unsigned char, thismb[256]);
+    DECLARE_ALIGNED(16, unsigned char, thismb[256]);    // 16x16 Y
 
     unsigned char *thismb_ptr;
-    // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries
+    // 16 Y blocks, 4 U blocks, 4 V blocks,
+    // 1 DC 2nd order block each with 16 entries
     BLOCK block[25];
 
     YV12_BUFFER_CONFIG src;
@@ -113,7 +130,6 @@ typedef struct
     int mv_row_min;
     int mv_row_max;
 
-    int vector_range;    // Used to monitor limiting range of recent vectors to guide search.
     int skip;
 
     int encode_breakout;
@@ -135,6 +151,16 @@ typedef struct
     int optimize;
     int q_index;
 
+    int encode_as_sb;
+
+    // Structure to hold context for each of the 4 MBs within a SB:
+    // when encoded as 4 independent MBs:
+    PICK_MODE_CONTEXT mb_context[4];
+#if CONFIG_SUPERBLOCKS
+    // when 4 MBs share coding parameters:
+    PICK_MODE_CONTEXT sb_context[4];
+#endif
+
     void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
     void (*short_walsh4x4)(short *input, short *output, int pitch);
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b9b100b8f5b1f1269ceeb02167c0741a899b38a0..e742809c8e2ff1d02715c681ea3021366b63cf48 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -56,10 +56,16 @@ extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
                                       MB_ROW_COMP *mbr_ei,
                                       int mb_row,
                                       int count);
+extern int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
+                                            int recon_yoffset,
+                                            int recon_uvoffset);
 void vp8_build_block_offsets(MACROBLOCK *x);
 void vp8_setup_block_ptrs(MACROBLOCK *x);
-int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
-int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
+void vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
+                                   int recon_yoffset, int recon_uvoffset,
+                                   int output_enabled);
+void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x,
+                                    TOKENEXTRA **t, int output_enabled);
 static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
 
 
@@ -315,7 +321,9 @@ static void build_activity_map( VP8_COMP *cpi )
             recon_yoffset += 16;
 #endif
             //Copy current mb to a buffer
-            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                      x->src.y_stride,
+                                                      x->thismb, 16);
 
             // measure activity
             mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
@@ -380,80 +388,406 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
     adjust_act_zbin(cpi, x);
 }
 
-static
-void encode_mb_row(VP8_COMP *cpi,
-                   VP8_COMMON *cm,
-                   int mb_row,
-                   MACROBLOCK  *x,
-                   MACROBLOCKD *xd,
-                   TOKENEXTRA **tp,
-                   int *totalrate)
+static void update_state (VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx)
 {
+    int i;
+    MACROBLOCKD *xd = &x->e_mbd;
+    MODE_INFO *mi = &ctx->mic;
+    int mb_mode = mi->mbmi.mode;
+    int mb_mode_index = ctx->best_mode_index;
+
+#if CONFIG_DEBUG
+    assert (mb_mode < MB_MODE_COUNT);
+    assert (mb_mode_index < MAX_MODES);
+    assert (mi->mbmi.ref_frame < MAX_REF_FRAMES);
+#endif
+
+    // Restore the coding context of the MB to that that was in place
+    // when the mode was picked for it
+    vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO));
+
+    if (mb_mode == B_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode;
+            assert (xd->block[i].bmi.as_mode.first < MB_MODE_COUNT);
+        }
+    }
+    else if (mb_mode == I8X8_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi = xd->mode_info_context->bmi[i];
+        }
+    }
+    else if (mb_mode == SPLITMV)
+    {
+        vpx_memcpy(x->partition_info, &ctx->partition_info,
+                   sizeof(PARTITION_INFO));
+
+        xd->mode_info_context->mbmi.mv.as_int =
+                                      x->partition_info->bmi[15].mv.as_int;
+    }
+
+    if (cpi->common.frame_type == KEY_FRAME)
+    {
+        // Restore the coding modes to that held in the coding context
+        //if (mb_mode == B_PRED)
+        //    for (i = 0; i < 16; i++)
+        //    {
+        //        xd->block[i].bmi.as_mode =
+        //                          xd->mode_info_context->bmi[i].as_mode;
+        //        assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT);
+        //    }
+    }
+    else
+    {
+/*
+        // Reduce the activation RD thresholds for the best choice mode
+        if ((cpi->rd_baseline_thresh[mb_mode_index] > 0) &&
+            (cpi->rd_baseline_thresh[mb_mode_index] < (INT_MAX >> 2)))
+        {
+            int best_adjustment = (cpi->rd_thresh_mult[mb_mode_index] >> 2);
+
+            cpi->rd_thresh_mult[mb_mode_index] =
+                    (cpi->rd_thresh_mult[mb_mode_index]
+                     >= (MIN_THRESHMULT + best_adjustment)) ?
+                            cpi->rd_thresh_mult[mb_mode_index] - best_adjustment :
+                            MIN_THRESHMULT;
+            cpi->rd_threshes[mb_mode_index] =
+                    (cpi->rd_baseline_thresh[mb_mode_index] >> 7)
+                    * cpi->rd_thresh_mult[mb_mode_index];
+
+        }
+*/
+        // Note how often each mode chosen as best
+        cpi->mode_chosen_counts[mb_mode_index]++;
+
+        rd_update_mvcount(cpi, x, &ctx->best_ref_mv);
+
+        cpi->prediction_error += ctx->distortion;
+        cpi->intra_error += ctx->intra_error;
+    }
+}
+
+static void pick_mb_modes (VP8_COMP *cpi,
+                           VP8_COMMON *cm,
+                           int mb_row,
+                           int mb_col,
+                           MACROBLOCK  *x,
+                           MACROBLOCKD *xd,
+                           TOKENEXTRA **tp,
+                           int *totalrate)
+{
+    int i;
+    int map_index;
     int recon_yoffset, recon_uvoffset;
-    int mb_col;
     int ref_fb_idx = cm->lst_fb_idx;
     int dst_fb_idx = cm->new_fb_idx;
     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
-    int map_index = (mb_row * cpi->common.mb_cols);
+    ENTROPY_CONTEXT_PLANES left_context[2];
+    ENTROPY_CONTEXT_PLANES above_context[2];
+    ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
+                                                        + mb_col;
+
+    // Offsets to move pointers from MB to MB within a SB in raster order
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
+    /* Function should not modify L & A contexts; save and restore on exit */
+    vpx_memcpy (left_context,
+                cpi->left_context,
+                sizeof(left_context));
+    vpx_memcpy (above_context,
+                initial_above_context_ptr,
+                sizeof(above_context));
+
+    /* Encode MBs in raster order within the SB */
+    for ( i=0; i<4; i++ )
+    {
+        int dy = row_delta[i];
+        int dx = col_delta[i];
+        int offset_unextended = dy * cm->mb_cols + dx;
+        int offset_extended   = dy * xd->mode_info_stride + dx;
+
+        // TODO Many of the index items here can be computed more efficiently!
+
+        if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+        {
+            // MB lies outside frame, move on
+            mb_row += dy;
+            mb_col += dx;
+
+            // Update pointers
+            x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+            x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+            x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+            x->gf_active_ptr += offset_unextended;
+            x->partition_info += offset_extended;
+            xd->mode_info_context += offset_extended;
+            xd->prev_mode_info_context += offset_extended;
+#if CONFIG_DEBUG
+            assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+                   (xd->mode_info_context - cpi->common.mip));
+#endif
+            continue;
+        }
+
+        // Index of the MB in the SB 0..3
+        xd->mb_index = i;
+
+        map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+        x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+        // set above context pointer
+        xd->above_context = cm->above_context + mb_col;
+
+        // Restore the appropriate left context depending on which
+        // row in the SB the MB is situated
+        vpx_memcpy (&cm->left_context,
+                    &cpi->left_context[i>>1],
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        // Set up distance of MB to edge of frame in 1/8th pel units
+        xd->mb_to_top_edge    = -((mb_row * 16) << 3);
+        xd->mb_to_left_edge   = -((mb_col * 16) << 3);
+        xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        xd->mb_to_right_edge  = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 16x16 block size
+        x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 16);
+        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+                            + (VP8BORDERINPIXELS - 16);
+
+        xd->up_available   = (mb_row != 0);
+        xd->left_available = (mb_col != 0);
 
-    // Reset the left context
-    vp8_zero(cm->left_context)
+        recon_yoffset  = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+        recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col *  8);
 
-    // reset above block coeffs
-    xd->above_context = cm->above_context;
+        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
-    xd->up_available = (mb_row != 0);
-    recon_yoffset = (mb_row * recon_y_stride * 16);
-    recon_uvoffset = (mb_row * recon_uv_stride * 8);
+        // Copy current MB to a work buffer
+        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                  x->src.y_stride,
+                                                  x->thismb, 16);
 
-    cpi->tplist[mb_row].start = *tp;
-    //printf("Main mb_row = %d\n", mb_row);
+        x->rddiv = cpi->RDDIV;
+        x->rdmult = cpi->RDMULT;
 
-    // Distance of Mb to the top & bottom edges, specified in 1/8th pel
-    // units as they are always compared to values that are in 1/8th pel units
-    xd->mb_to_top_edge = -((mb_row * 16) << 3);
-    xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+            vp8_activity_masking(cpi, x);
 
-    // Set up limit values for vertical motion vector components
-    // to prevent them extending beyond the UMV borders
-    x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
-                        + (VP8BORDERINPIXELS - 16);
+        // Is segmentation enabled
+        if (xd->segmentation_enabled)
+        {
+            // Code to set segment id in xd->mbmi.segment_id
+            if (cpi->segmentation_map[map_index] <= 3)
+                xd->mode_info_context->mbmi.segment_id =
+                              cpi->segmentation_map[map_index];
+            else
+                xd->mode_info_context->mbmi.segment_id = 0;
 
-    // Set the mb activity pointer to the start of the row.
-    x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+            vp8cx_mb_init_quantizer(cpi, x);
+        }
+        else
+            // Set to Segment 0 by default
+            xd->mode_info_context->mbmi.segment_id = 0;
+
+        x->active_ptr = cpi->active_map + map_index;
+
+        /* force 4x4 transform for mode selection */
+        xd->mode_info_context->mbmi.txfm_size = TX_4X4; // TODO IS this right??
 
-    // for each macroblock col in image
-    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        cpi->update_context = 0;    // TODO Do we need this now??
+
+        // Find best coding mode & reconstruct the MB so it is available
+        // as a predictor for MBs that follow in the SB
+        if (cm->frame_type == KEY_FRAME)
+        {
+            *totalrate += vp8_rd_pick_intra_mode(cpi, x);
+
+            // Save the coding context
+            vpx_memcpy (&x->mb_context[i].mic, xd->mode_info_context,
+                        sizeof(MODE_INFO));
+
+            // Dummy encode, do not do the tokenization
+            vp8cx_encode_intra_macro_block(cpi, x, tp, 0);
+            //Note the encoder may have changed the segment_id
+        }
+        else
+        {
+            *totalrate += vp8cx_pick_mode_inter_macroblock(cpi, x,
+                                                           recon_yoffset,
+                                                           recon_uvoffset);
+
+            // Dummy encode, do not do the tokenization
+            vp8cx_encode_inter_macroblock(cpi, x, tp,
+                                         recon_yoffset, recon_uvoffset, 0);
+        }
+
+        // Keep a copy of the updated left context
+        vpx_memcpy (&cpi->left_context[i>>1],
+                    &cm->left_context,
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        // Next MB
+        mb_row += dy;
+        mb_col += dx;
+
+        x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+        x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+        x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+        x->gf_active_ptr += offset_unextended;
+        x->partition_info += offset_extended;
+        xd->mode_info_context += offset_extended;
+        xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+        assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+               (xd->mode_info_context - cpi->common.mip));
+#endif
+    }
+
+    /* Restore L & A coding context to those in place on entry */
+    vpx_memcpy (cpi->left_context,
+                left_context,
+                sizeof(left_context));
+    vpx_memcpy (initial_above_context_ptr,
+                above_context,
+                sizeof(above_context));
+}
+
+static void encode_sb ( VP8_COMP *cpi,
+                        VP8_COMMON *cm,
+                        int mbrow,
+                        int mbcol,
+                        MACROBLOCK  *x,
+                        MACROBLOCKD *xd,
+                        TOKENEXTRA **tp )
+{
+    int i, j;
+    int map_index;
+    int mb_row, mb_col;
+    int recon_yoffset, recon_uvoffset;
+    int ref_fb_idx = cm->lst_fb_idx;
+    int dst_fb_idx = cm->new_fb_idx;
+    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
+    mb_row = mbrow;
+    mb_col = mbcol;
+
+    /* Encode MBs in raster order within the SB */
+    for ( i=0; i<4; i++ )
     {
+        int dy = row_delta[i];
+        int dx = col_delta[i];
+        int offset_extended   = dy * xd->mode_info_stride + dx;
+        int offset_unextended = dy * cm->mb_cols + dx;
+
+        if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+        {
+            // MB lies outside frame, move on
+            mb_row += dy;
+            mb_col += dx;
+
+            x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+            x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+            x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+            x->gf_active_ptr      += offset_unextended;
+            x->partition_info     += offset_extended;
+            xd->mode_info_context += offset_extended;
+            xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+            assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+                   (xd->mode_info_context - cpi->common.mip));
+#endif
+            continue;
+        }
+
+        xd->mb_index = i;
+
 #ifdef ENC_DEBUG
-        enc_debug = (cpi->common.current_video_frame ==1 && mb_row==4 && mb_col==0);
+        enc_debug = (cpi->common.current_video_frame == 0 &&
+                     mb_row==0 && mb_col==0);
         mb_col_debug=mb_col;
         mb_row_debug=mb_row;
 #endif
-        // Distance of Mb to the left & right edges, specified in
-        // 1/8th pel units as they are always compared to values
-        // that are in 1/8th pel units
-        xd->mb_to_left_edge = -((mb_col * 16) << 3);
-        xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
-
-        // Set up limit values for horizontal motion vector components
-        // to prevent them extending beyond the UMV borders
+
+        // Restore MB state to that when it was picked
+#if CONFIG_SUPERBLOCKS
+        if (x->encode_as_sb)
+            update_state (cpi, x, &x->sb_context[i]);
+        else
+#endif
+            update_state (cpi, x, &x->mb_context[i]);
+
+        // Copy in the appropriate left context
+        vpx_memcpy (&cm->left_context,
+                    &cpi->left_context[i>>1],
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+        x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+        // reset above block coeffs
+        xd->above_context = cm->above_context + mb_col;
+
+        // Set up distance of MB to edge of the frame in 1/8th pel units
+        xd->mb_to_top_edge    = -((mb_row * 16) << 3);
+        xd->mb_to_left_edge   = -((mb_col * 16) << 3);
+        xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        xd->mb_to_right_edge  = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 16x16 block size
+        x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 16);
         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
                             + (VP8BORDERINPIXELS - 16);
 
+#if CONFIG_SUPERBLOCKS
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 32x32 block size
+        x->mv_row_min_sb = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max_sb = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 32);
+        x->mv_col_min_sb = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_col_max_sb = ((cm->mb_cols - 1 - mb_col) * 16)
+                            + (VP8BORDERINPIXELS - 32);
+#endif
+
+        xd->up_available = (mb_row != 0);
+        xd->left_available = (mb_col != 0);
+
+        recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+        recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
-        xd->left_available = (mb_col != 0);
 
-        x->rddiv = cpi->RDDIV;
-        x->rdmult = cpi->RDMULT;
-
-        //Copy current mb to a buffer
-        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+        // Copy current MB to a work buffer
+        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                  x->src.y_stride,
+                                                  x->thismb, 16);
 
         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
             vp8_activity_masking(cpi, x);
@@ -462,8 +796,9 @@ void encode_mb_row(VP8_COMP *cpi,
         if (xd->segmentation_enabled)
         {
             // Code to set segment id in xd->mbmi.segment_id
-            if (cpi->segmentation_map[map_index+mb_col] <= 3)
-                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col];
+            if (cpi->segmentation_map[map_index] <= 3)
+                xd->mode_info_context->mbmi.segment_id =
+                              cpi->segmentation_map[map_index];
             else
                 xd->mode_info_context->mbmi.segment_id = 0;
 
@@ -473,14 +808,13 @@ void encode_mb_row(VP8_COMP *cpi,
             // Set to Segment 0 by default
             xd->mode_info_context->mbmi.segment_id = 0;
 
-        x->active_ptr = cpi->active_map + map_index + mb_col;
+        x->active_ptr = cpi->active_map + map_index;
 
-        /* force 4x4 transform for mode selection */
-        xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->update_context = 0;
 
         if (cm->frame_type == KEY_FRAME)
         {
-            *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
+            vp8cx_encode_intra_macro_block(cpi, x, tp, 1);
             //Note the encoder may have changed the segment_id
 
 #ifdef MODE_STATS
@@ -489,7 +823,8 @@ void encode_mb_row(VP8_COMP *cpi,
         }
         else
         {
-            *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
+            vp8cx_encode_inter_macroblock(cpi, x, tp,
+                                         recon_yoffset, recon_uvoffset, 1);
             //Note the encoder may have changed the segment_id
 
 #ifdef MODE_STATS
@@ -502,63 +837,148 @@ void encode_mb_row(VP8_COMP *cpi,
                 for (b = 0; b < x->partition_info->count; b++)
                 {
                     inter_b_modes[x->partition_info->bmi[b].mode] ++;
-                }
+               }
             }
 
 #endif
 
             // Count of last ref frame 0,0 usage
-            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) &&
+                (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 cpi->inter_zz_count ++;
         }
 
+        // TODO Partitioning is broken!
         cpi->tplist[mb_row].stop = *tp;
 
-        // Increment pointer into gf usage flags structure.
-        x->gf_active_ptr++;
-
-        // Increment the activity mask pointers.
-        x->mb_activity_ptr++;
+        // Copy back updated left context
+        vpx_memcpy (&cpi->left_context[i>>1],
+                    &cm->left_context,
+                    sizeof(ENTROPY_CONTEXT_PLANES));
 
-        // adjust to the next column of macroblocks
-        x->src.y_buffer += 16;
-        x->src.u_buffer += 8;
-        x->src.v_buffer += 8;
+        // Next MB
+        mb_row += dy;
+        mb_col += dx;
 
-        recon_yoffset += 16;
-        recon_uvoffset += 8;
+        x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+        x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+        x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
 
-        // skip to next mb
-        xd->mode_info_context++;
+        x->gf_active_ptr      += offset_unextended;
+        x->partition_info     += offset_extended;
+        xd->mode_info_context += offset_extended;
+        xd->prev_mode_info_context += offset_extended;
 
-        xd->prev_mode_info_context++;
-        assert((xd->prev_mode_info_context - cpi->common.prev_mip)
-            ==(xd->mode_info_context - cpi->common.mip));
-        x->partition_info++;
-
-        xd->above_context++;
+#if CONFIG_DEBUG
+        assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+               (xd->mode_info_context - cpi->common.mip));
+#endif
     }
 
-    //extend the recon for intra prediction
-    vp8_extend_mb_row(
-        &cm->yv12_fb[dst_fb_idx],
-        xd->dst.y_buffer + 16,
-        xd->dst.u_buffer + 8,
-        xd->dst.v_buffer + 8);
+    // debug output
+#if DBG_PRNT_SEGMAP
+        {
+            FILE *statsfile;
+            statsfile = fopen("segmap2.stt", "a");
+            fprintf(statsfile, "\n" );
+            fclose(statsfile);
+        }
+    #endif
+}
 
-    // this is to account for the border
-    xd->prev_mode_info_context++;
-    xd->mode_info_context++;
-    x->partition_info++;
+static
+void encode_sb_row ( VP8_COMP *cpi,
+                     VP8_COMMON *cm,
+                     int mb_row,
+                     MACROBLOCK  *x,
+                     MACROBLOCKD *xd,
+                     TOKENEXTRA **tp,
+                     int *totalrate )
+{
+    int mb_col;
+    int mb_cols = cm->mb_cols;
 
-// debug output
-#if DBG_PRNT_SEGMAP
+    // Initialize the left context for the new SB row
+    vpx_memset (cpi->left_context, 0, sizeof(cpi->left_context));
+    vpx_memset (&cm->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    // Code each SB in the row
+    for (mb_col=0; mb_col<mb_cols; mb_col+=2)
     {
-        FILE *statsfile;
-        statsfile = fopen("segmap2.stt", "a");
-        fprintf(statsfile, "\n" );
-        fclose(statsfile);
+        int mb_rate = 0;
+#if CONFIG_SUPERBLOCKS
+        int sb_rate = INT_MAX;
+#endif
+
+#if CONFIG_DEBUG
+        MODE_INFO *mic = xd->mode_info_context;
+        PARTITION_INFO *pi = x->partition_info;
+        signed char  *gfa = x->gf_active_ptr;
+        unsigned char *yb = x->src.y_buffer;
+        unsigned char *ub = x->src.u_buffer;
+        unsigned char *vb = x->src.v_buffer;
+#endif
+
+        // Pick modes assuming the SB is coded as 4 independent MBs
+        pick_mb_modes (cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate);
+
+        x->src.y_buffer -= 32;
+        x->src.u_buffer -= 16;
+        x->src.v_buffer -= 16;
+
+        x->gf_active_ptr -= 2;
+        x->partition_info -= 2;
+        xd->mode_info_context -= 2;
+        xd->prev_mode_info_context -= 2;
+
+#if CONFIG_DEBUG
+        assert (x->gf_active_ptr == gfa);
+        assert (x->partition_info == pi);
+        assert (xd->mode_info_context == mic);
+        assert (x->src.y_buffer == yb);
+        assert (x->src.u_buffer == ub);
+        assert (x->src.v_buffer == vb);
+#endif
+
+#if CONFIG_SUPERBLOCKS
+        // Pick a mode assuming that it applies all 4 of the MBs in the SB
+        pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, &sb_rate);
+
+        // Decide whether to encode as a SB or 4xMBs
+        if(sb_rate < mb_rate)
+        {
+            x->encode_as_sb = 1;
+            *totalrate += sb_rate;
+        }
+        else
+#endif
+        {
+            x->encode_as_sb = 0;
+            *totalrate += mb_rate;
+        }
+
+        // Encode SB using best computed mode(s)
+        encode_sb (cpi, cm, mb_row, mb_col, x, xd, tp);
+
+#if CONFIG_DEBUG
+        assert (x->gf_active_ptr == gfa+2);
+        assert (x->partition_info == pi+2);
+        assert (xd->mode_info_context == mic+2);
+        assert (x->src.y_buffer == yb+32);
+        assert (x->src.u_buffer == ub+16);
+        assert (x->src.v_buffer == vb+16);
+#endif
     }
+
+    // this is to account for the border
+    x->gf_active_ptr += mb_cols - (mb_cols & 0x1);
+    x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+    xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+    xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+
+#if CONFIG_DEBUG
+    assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+           (xd->mode_info_context - cpi->common.mip));
 #endif
 }
 
@@ -574,8 +994,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
     // Activity map pointer
     x->mb_activity_ptr = cpi->mb_activity_map;
 
-    x->vector_range = 32;
-
     x->act_zbin_adj = 0;
 
     x->partition_info = x->pi;
@@ -593,7 +1011,7 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
     if (cm->frame_type == KEY_FRAME)
         vp8_init_mbmode_probs(cm);
 
-    // Copy data over into macro block data sturctures.
+    // Copy data over into macro block data structures.
     x->src = * cpi->Source;
     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
     xd->dst = cm->yv12_fb[cm->new_fb_idx];
@@ -640,8 +1058,8 @@ static void encode_frame_internal(VP8_COMP *cpi)
     int totalrate;
 
     // Compute a modified set of reference frame probabilities to use when
-    // prediction fails. These are based on the current genreal estimates for
-    // this frame which may be updated with each itteration of the recode loop.
+    // prediction fails. These are based on the current general estimates for
+    // this frame which may be updated with each iteration of the recode loop.
     compute_mod_refprobs( cm );
 
 // debug output
@@ -740,7 +1158,6 @@ static void encode_frame_internal(VP8_COMP *cpi)
 #endif
 
     xd->mode_info_context = cm->mi;
-
     xd->prev_mode_info_context = cm->prev_mi;
 
     vp8_zero(cpi->MVcount);
@@ -775,19 +1192,20 @@ static void encode_frame_internal(VP8_COMP *cpi)
         vpx_usec_timer_start(&emr_timer);
 
         {
-            // for each macroblock row in the image
-            for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+            // For each row of SBs in the frame
+            for (mb_row = 0; mb_row < cm->mb_rows; mb_row+=2)
             {
-                encode_mb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+                int offset = (cm->mb_cols+1) & ~0x1;
 
-                // adjust to the next row of MBs
-                x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
-                x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
-                x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
+                encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+
+                // adjust to the next row of SBs
+                x->src.y_buffer += 32 * x->src.y_stride - 16 * offset;
+                x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset;
+                x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset;
             }
 
             cpi->tok_count = tp - cpi->tok;
-
         }
 
         vpx_usec_timer_mark(&emr_timer);
@@ -795,8 +1213,9 @@ static void encode_frame_internal(VP8_COMP *cpi)
 
     }
 
-    // 256 rate units to the bit
-    cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
+    // 256 rate units to the bit,
+    // projected_frame_size in units of BYTES
+    cpi->projected_frame_size = totalrate >> 8;
 
     // Make a note of the percentage MBs coded Intra.
     if (cm->frame_type == KEY_FRAME)
@@ -813,7 +1232,8 @@ static void encode_frame_internal(VP8_COMP *cpi)
                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 
         if (tot_modes)
-            cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
+            cpi->this_frame_percent_intra =
+                   cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 
     }
 
@@ -1114,18 +1534,12 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
 #endif
 }
 
-int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
+void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
+                                   MACROBLOCK *x,
+                                   TOKENEXTRA **t,
+                                   int output_enabled)
 {
-    int rate, i;
-    int mb_skip_context;
-
-    // Non rd path deprecated in test code base
-    //if (cpi->sf.RD && cpi->compressor_speed != 2)
-    vp8_rd_pick_intra_mode(cpi, x, &rate);
-    //else
-    //   vp8_pick_intra_mode(cpi, x, &rate);
-
-    if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+    if((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled)
     {
         adjust_act_zbin( cpi, x );
         vp8_update_zbin_extra(cpi, x);
@@ -1157,9 +1571,13 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 
     if(x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED)
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
-    sum_intra_stats(cpi, x);
-    vp8_tokenize_mb(cpi, &x->e_mbd, t);
-    return rate;
+
+    if (output_enabled)
+    {
+        // Tokenize
+        sum_intra_stats(cpi, x);
+        vp8_tokenize_mb(cpi, &x->e_mbd, t);
+    }
 }
 #ifdef SPEEDSTATS
 extern int cnt_pm;
@@ -1167,10 +1585,11 @@ extern int cnt_pm;
 
 extern void vp8_fix_contexts(MACROBLOCKD *x);
 
-int vp8cx_encode_inter_macroblock
+void vp8cx_encode_inter_macroblock
 (
     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
-    int recon_yoffset, int recon_uvoffset
+    int recon_yoffset, int recon_uvoffset,
+    int output_enabled
 )
 {
     VP8_COMMON *cm = &cpi->common;
@@ -1184,64 +1603,6 @@ int vp8cx_encode_inter_macroblock
 
     x->skip = 0;
 
-    if (xd->segmentation_enabled)
-        x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
-    else
-        x->encode_breakout = cpi->oxcf.encode_breakout;
-
-    //if (cpi->sf.RD)
-    // For now this codebase is limited to a single rd encode path
-    {
-        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
-        int single, compound, hybrid;
-
-        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error, &single, &compound, &hybrid);
-
-        cpi->rd_single_diff += single;
-        cpi->rd_comp_diff   += compound;
-        cpi->rd_hybrid_diff += hybrid;
-        if (x->e_mbd.mode_info_context->mbmi.ref_frame &&
-            x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-        {
-            unsigned char pred_context;
-
-            pred_context = get_pred_context( cm, xd, PRED_COMP );
-
-            if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
-                cpi->single_pred_count[pred_context]++;
-            else
-                cpi->comp_pred_count[pred_context]++;
-        }
-
-
-        /* test code: set transform size based on mode selection */
-        if( cpi->common.txfm_mode == ALLOW_8X8
-            && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
-            && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
-            && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-        {
-            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
-            cpi->t8x8_count ++;
-        }
-        else
-        {
-            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
-            cpi->t4x4_count++;
-        }
-
-        /* restore cpi->zbin_mode_boost_enabled */
-        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
-
-    }
-    //else
-    // The non rd encode path has been deleted from this code base
-    // to simplify development
-    //    vp8_pick_inter_mode
-
-    cpi->prediction_error += distortion;
-    cpi->intra_error += intra_error;
-
     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
     {
         // Adjust the zbin based on this MB rate.
@@ -1250,7 +1611,7 @@ int vp8cx_encode_inter_macroblock
 
     {
         // Experimental code. Special case for gf and arf zeromv modes.
-        // Increase zbin size to supress noise
+        // Increase zbin size to suppress noise
         cpi->zbin_mode_boost = 0;
         if (cpi->zbin_mode_boost_enabled)
         {
@@ -1282,6 +1643,21 @@ int vp8cx_encode_inter_macroblock
                            get_pred_ref( cm, xd )) );
     set_pred_flag( xd, PRED_REF, ref_pred_flag );
 
+    /* test code: set transform size based on mode selection */
+    if( cpi->common.txfm_mode == ALLOW_8X8
+        && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+        cpi->t8x8_count ++;
+    }
+    else
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->t4x4_count++;
+    }
+
     // If we have just a single reference frame coded for a segment then
     // exclude from the reference frame counts used to work out
     // probabilities. NOTE: At the moment we dont support custom trees
@@ -1323,7 +1699,9 @@ int vp8cx_encode_inter_macroblock
             vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
         }
-        sum_intra_stats(cpi, x);
+
+        if (output_enabled)
+            sum_intra_stats(cpi, x);
     }
     else
     {
@@ -1394,7 +1772,8 @@ int vp8cx_encode_inter_macroblock
             fflush(stdout);
         }
 #endif
-        vp8_tokenize_mb(cpi, xd, t);
+        if (output_enabled)
+            vp8_tokenize_mb(cpi, xd, t);
 #ifdef ENC_DEBUG
         if (enc_debug) {
           printf("Tokenized\n");
@@ -1432,5 +1811,4 @@ int vp8cx_encode_inter_macroblock
 #endif
         }
     }
-    return rate;
 }
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index b24f9b4a9324449682f3548f6cd878d7a7d2ef98..16c4b35cbfc67c569f5b7c0a7b1e03020a184e26 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -101,7 +101,10 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
     int i;
 
     MACROBLOCKD *x = &mb->e_mbd;
+#if 0
+    // Intra modes requiring top-right MB reconstructed data have been disabled
     vp8_intra_prediction_down_copy(x);
+#endif
 
     for (i = 0; i < 16; i++)
         vp8_encode_intra4x4block(rtcd, mb, i);
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index d63a1f92342380dbe6a74a2b430e3c047de13a8c..b561ea3380e06b85466f2f5fb536cf678a9988a9 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -911,7 +911,7 @@ static double calc_correction_factor( double err_per_mb,
 // PGW TODO..
 // This code removes direct dependency on QIndex to determin the range
 // (now uses the actual quantizer) but has not been tuned.
-static double adjust_maxq_qrange(VP8_COMP *cpi)
+static void adjust_maxq_qrange(VP8_COMP *cpi)
 {
     int i;
     double q;
diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index 8695626623c0d34982a65968ceeb65a3e972b431..3089be89360a221f7f2d76cddacfb911a8280705 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -109,7 +109,7 @@ static unsigned int do_16x16_motion_iteration
     //VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
     best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (xd->dst.y_buffer, xd->dst.y_stride,
-                     xd->predictor, 16, &best_err);
+                     xd->predictor, 16, best_err);
 
     /* restore UMV window */
     x->mv_col_min = tmp_col_min;
@@ -158,7 +158,7 @@ static int do_16x16_motion_search
     err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (ref->y_buffer + mb_y_offset,
                      ref->y_stride, xd->dst.y_buffer,
-                     xd->dst.y_stride, &err);
+                     xd->dst.y_stride, INT_MAX);
     dst_mv->as_int = 0;
 
     // Test last reference frame using the previous best mv as the
@@ -224,7 +224,7 @@ static int do_16x16_zerozero_search
     err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (ref->y_buffer + mb_y_offset,
                      ref->y_stride, xd->dst.y_buffer,
-                     xd->dst.y_stride, &err);
+                     xd->dst.y_stride, INT_MAX);
 
     dst_mv->as_int = 0;
 
@@ -255,7 +255,7 @@ static int find_best_16x16_intra
         err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                         (xd->predictor, 16,
                          buf->y_buffer + mb_y_offset,
-                         buf->y_stride, &err);
+                         buf->y_stride, err);
         // find best
         if (err < best_err)
         {
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index f07d18b49e5df1a4be962f4890d8ec757d1dc29c..c9058ae406d488f81687bdd773eb0de9469c6748 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1257,7 +1257,6 @@ int vp8_hex_search
     int k = -1;
     int all_in;
     int best_site = -1;
-    MACROBLOCKD *xd = &x->e_mbd;
 
     int_mv fcenter_mv;
     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 021c450be49653e9851dd4a14079eaaa2255989e..fb13c867b9454d96c318208205c89db577d85cee 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -205,8 +205,7 @@ int calculate_minq_index( double maxq,
         if ( minqtarget <= vp8_convert_qindex_to_q(i) )
             return i;
     }
-    if ( i == QINDEX_RANGE )
-        return QINDEX_RANGE-1;
+    return QINDEX_RANGE-1;
 }
 void init_minq_luts()
 {
@@ -408,7 +407,6 @@ static int compute_qdelta( VP8_COMP *cpi, double qstart, double qtarget )
     int i;
     int start_index = cpi->worst_quality;
     int target_index = cpi->worst_quality;
-    int retval = 0;
 
     // Convert the average q value to an index.
     for ( i = cpi->best_quality; i < cpi->worst_quality; i++ )
@@ -590,7 +588,7 @@ static void init_seg_features(VP8_COMP *cpi)
         // All other frames.
         else
         {
-            // No updeates.. leave things as they are.
+            // No updates.. leave things as they are.
             xd->update_mb_segmentation_map = 0;
             xd->update_mb_segmentation_data = 0;
         }
@@ -665,8 +663,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
         cpi->mode_chosen_counts[i] = 0;
     }
 
-    cpi->mbs_tested_so_far = 0;
-
     // best quality defaults
     sf->RD = 1;
     sf->search_method = NSTEP;
@@ -2500,6 +2496,30 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
     vp8_set_quantizer(cpi, find_fp_qindex());
     vp8_first_pass(cpi);
 }
+
+#if 1
+void write_yuv_frame_to_file(YV12_BUFFER_CONFIG *frame)
+{
+
+    // write the frame
+    int i;
+    char filename[255];
+    FILE *fp = fopen("encode_recon.yuv", "a");
+
+    for (i = 0; i < frame->y_height; i++)
+        fwrite(frame->y_buffer + i * frame->y_stride,
+            frame->y_width, 1, fp);
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->u_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, fp);
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->v_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, fp);
+
+    fclose(fp);
+}
+#endif
+
 //#define WRITE_RECON_BUFFER 1
 #if WRITE_RECON_BUFFER
 void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
@@ -2567,7 +2587,7 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame)
     return (double)num_edge_pels/(double)num_pels;
 }
 
-// Function to test for conditions that indeicate we should loop
+// Function to test for conditions that indicate we should loop
 // back and recode a frame.
 static BOOL recode_loop_test( VP8_COMP *cpi,
                               int high_limit, int low_limit,
@@ -3563,9 +3583,12 @@ static void encode_frame_to_data_rate
         loopfilter_frame(cpi, cm);
     }
 
+    if(cm->show_frame)
+        write_yuv_frame_to_file(cm->frame_to_show);
+
     update_reference_frames(cm);
 
-    // Work out the segment probabilites if segmentation is enabled and
+    // Work out the segment probabilities if segmentation is enabled and
     // the map is due to be updated
     if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
     {
@@ -3935,22 +3958,25 @@ static void check_gf_quality(VP8_COMP *cpi)
             // Low use of gf
             if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15))
             {
-                // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate
+                // ...but last frame zero zero usage is reasonable
+                // so a new gf might be appropriate
                 if (last_ref_zz_useage >= 25)
                 {
                     cpi->gf_bad_count ++;
 
-                    if (cpi->gf_bad_count >= 8)   // Check that the condition is stable
+                    // Check that the condition is stable
+                    if (cpi->gf_bad_count >= 8)
                     {
                         cpi->gf_update_recommended = 1;
                         cpi->gf_bad_count = 0;
                     }
                 }
                 else
-                    cpi->gf_bad_count = 0;        // Restart count as the background is not stable enough
+                    cpi->gf_bad_count = 0;  // Restart count as the background
+                                            // is not stable enough
             }
             else
-                cpi->gf_bad_count = 0;            // Gf useage has picked up so reset count
+                cpi->gf_bad_count = 0;  // Gf usage has picked up so reset count
         }
     }
     // If the signal is set but has not been read should we cancel it.
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index ed3a8823439e99b9cba0dd4a397a9d234cf5f3e2..807f0a5bc85350ddd8c9a9d9af6613b02e402c95 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -348,7 +348,6 @@ typedef struct VP8_COMP
     unsigned int mode_check_freq[MAX_MODES];
     unsigned int mode_test_hit_counts[MAX_MODES];
     unsigned int mode_chosen_counts[MAX_MODES];
-    unsigned int mbs_tested_so_far;
 
     int rd_thresh_mult[MAX_MODES];
     int rd_baseline_thresh[MAX_MODES];
@@ -642,9 +641,17 @@ typedef struct VP8_COMP
     int *lf_ref_frame_sign_bias;
     int *lf_ref_frame;
 
-    int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
+    /* force next frame to intra when kf_auto says so */
+    int force_next_frame_intra;
 
     int droppable;
+
+    // Global store for SB left contexts, one for each MB row in the SB
+    ENTROPY_CONTEXT_PLANES left_context[2];
+
+    // TODO Do we still need this??
+    int update_context;
+
 } VP8_COMP;
 
 void control_data_rate(VP8_COMP *cpi);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index c798a7ee6f97a38033d9d45750a1b3840fbec688..c46be62e53acc03f7e519f04bf99ccf1039a4d80 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -874,6 +874,12 @@ static int rd_pick_intra4x4block(
         int this_rd;
         int ratey;
 
+        // TODO Temporarily ignore modes that need the above-right data. SB
+        // encoding means this data is not available for the bottom right MB
+        // Do we need to do this for mode2 also?
+        if (mode==B_LD_PRED || mode==B_VL_PRED)
+            continue;
+
         rate = bmode_costs[mode];
 
 #if CONFIG_COMP_INTRA_PRED
@@ -936,10 +942,10 @@ static int rd_pick_intra4x4block(
 
 static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
                                      int *rate_y, int *Distortion, int best_rd,
-                                     int allow_comp)
+                                     int allow_comp, int update_contexts)
 {
-    MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
+    MACROBLOCKD *const xd = &mb->e_mbd;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int distortion = 0;
     int tot_rate_y = 0;
@@ -949,13 +955,25 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
     ENTROPY_CONTEXT *tl;
     unsigned int *bmode_costs;
 
-    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
-    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    if (update_contexts)
+    {
+        ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+        tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+    }
+    else
+    {
+        vpx_memcpy(&t_above, mb->e_mbd.above_context,
+                   sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memcpy(&t_left, mb->e_mbd.left_context,
+                   sizeof(ENTROPY_CONTEXT_PLANES));
 
-    ta = (ENTROPY_CONTEXT *)&t_above;
-    tl = (ENTROPY_CONTEXT *)&t_left;
+        ta = (ENTROPY_CONTEXT *)&t_above;
+        tl = (ENTROPY_CONTEXT *)&t_left;
+    }
 
+#if 0
     vp8_intra_prediction_down_copy(xd);
+#endif
 
     bmode_costs = mb->inter_bmode_costs;
 
@@ -2348,7 +2366,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
     }
 }
 
-static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
+/*static */void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
 {
     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
     {
@@ -2445,7 +2463,7 @@ void vp8_estimate_ref_frame_costs(VP8_COMP *cpi, unsigned int * ref_costs )
     // Get the context probability for the prediction flag
     pred_prob = get_pred_prob( cm, xd, PRED_REF );
 
-    // Get the set of probailities to use if prediction fails
+    // Get the set of probabilities to use if prediction fails
     mod_refprobs = cm->mod_refprobs[pred_ref];
 
     // For each possible selected reference frame work out a cost.
@@ -2459,7 +2477,7 @@ void vp8_estimate_ref_frame_costs(VP8_COMP *cpi, unsigned int * ref_costs )
         // Get the prediction for the current mb
         cost = vp8_cost_bit( pred_prob, pred_flag );
 
-        // for incorectly predicted cases
+        // for incorrectly predicted cases
         if ( ! pred_flag )
         {
             if ( mod_refprobs[0] )
@@ -2503,6 +2521,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     int best_mode_index = 0;
     int mode8x8[2][4];
     unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
+    int mb_index = xd->mb_index;
 
     int i;
     int mode_index;
@@ -2549,6 +2568,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
     vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
+    vpx_memset(&x->mb_context[mb_index], 0, sizeof(PICK_MODE_CONTEXT));
 
     for (i = 0; i < 4; i++)
     {
@@ -2599,7 +2619,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     }
 
     *returnintra = INT_MAX;
-    cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
 
     x->skip = 0;
 
@@ -2647,8 +2666,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         distortion2 = 0;
 
         this_mode = vp8_mode_order[mode_index];
-
         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+
 #if CONFIG_COMP_INTRA_PRED
         x->e_mbd.mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE) (DC_PRED - 1);
         x->e_mbd.mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE) (DC_PRED - 1);
@@ -2736,7 +2755,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             int tmp_rd;
 
             // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
-            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, 0);
+            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, 0, 0);
             rate2 += rate;
             distortion2 += distortion;
 
@@ -3049,7 +3068,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                 if (sse < threshold)
                 {
                      unsigned int q2dc = xd->block[24].dequant[0];
-                    /* If theres is no codeable 2nd order dc
+                    /* If there is no codeable 2nd order dc
                        or a very small uniform pixel change change */
                     if ((sse - var < q2dc * q2dc >>4) ||
                         (sse /2 > var && sse-var < 64))
@@ -3222,7 +3241,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
         // Where skip is allowable add in the default per mb cost for the no skip case.
         // where we then decide to skip we have to delete this and replace it with the
-        // cost of signallying a skip
+        // cost of signaling a skip
         if (cpi->common.mb_no_coeff_skip)
         {
 #if CONFIG_NEWENTROPY
@@ -3329,7 +3348,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                 best_hybrid_rd = this_rd;
         }
 
-        // Did this mode help.. i.i is it the new best mode
+        // Did this mode help.. i.e. is it the new best mode
         if (this_rd < best_rd || x->skip)
         {
             if (!mode_excluded)
@@ -3454,9 +3473,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     }
 
-    // Note how often each mode chosen as best
-    cpi->mode_chosen_counts[best_mode_index] ++;
-
     // This code force Altref,0,0 and skip for the frame that overlays a
     // an alrtef unless Altref is filtered. However, this is unsafe if
     // segment level coding of ref frame or mode is enabled for this
@@ -3480,7 +3496,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         return;
     }
 
-
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
@@ -3509,8 +3524,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                                       x->partition_info->bmi[15].mv.as_int;
     }
 
-    rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
-
     if (best_single_rd == INT_MAX)
         *best_single_rd_diff = INT_MIN;
     else
@@ -3523,9 +3536,22 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         *best_hybrid_rd_diff = INT_MIN;
     else
         *best_hybrid_rd_diff = best_rd - best_hybrid_rd;
+
+    // Take a snapshot of the coding context so it can be
+    // restored if we decide to encode this way
+    x->mb_context[mb_index].best_mode_index = best_mode_index;
+    vpx_memcpy(&x->mb_context[mb_index].mic, x->e_mbd.mode_info_context,
+               sizeof(MODE_INFO));
+    vpx_memcpy(&x->mb_context[mb_index].partition_info, &best_partition,
+               sizeof(PARTITION_INFO));
+    vpx_memcpy(&x->mb_context[mb_index].best_ref_mv,
+               &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+               sizeof(int_mv));
+    //x->mb_context[mb_index].rddiv = x->rddiv;
+    //x->mb_context[mb_index].rdmult = x->rdmult;
 }
 
-void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x)
 {
     MACROBLOCKD *xd = &x->e_mbd;
     int error4x4, error16x16, error4x4d;
@@ -3540,7 +3566,7 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
     int mode16x16;
     int mode8x8[2][4];
 
-    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+    xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
     rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
     rate = rateuv;
@@ -3548,28 +3574,28 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
     error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
                                             &rate16x16, &rate16x16_tokenonly,
                                             &dist16x16);
-    mode16x16 = x->e_mbd.mode_info_context->mbmi.mode;
+    mode16x16 = xd->mode_info_context->mbmi.mode;
 
     error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
                 &rate8x8, &rate8x8_tokenonly,
                 &dist8x8, error16x16);
-    mode8x8[0][0]= x->e_mbd.mode_info_context->bmi[0].as_mode.first;
-    mode8x8[0][1]= x->e_mbd.mode_info_context->bmi[2].as_mode.first;
-    mode8x8[0][2]= x->e_mbd.mode_info_context->bmi[8].as_mode.first;
-    mode8x8[0][3]= x->e_mbd.mode_info_context->bmi[10].as_mode.first;
+    mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first;
+    mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first;
+    mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first;
+    mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first;
 #if CONFIG_COMP_INTRA_PRED
-    mode8x8[1][0]= x->e_mbd.mode_info_context->bmi[0].as_mode.second;
-    mode8x8[1][1]= x->e_mbd.mode_info_context->bmi[2].as_mode.second;
-    mode8x8[1][2]= x->e_mbd.mode_info_context->bmi[8].as_mode.second;
-    mode8x8[1][3]= x->e_mbd.mode_info_context->bmi[10].as_mode.second;
+    mode8x8[1][0]= xd->mode_info_context->bmi[0].as_mode.second;
+    mode8x8[1][1]= xd->mode_info_context->bmi[2].as_mode.second;
+    mode8x8[1][2]= xd->mode_info_context->bmi[8].as_mode.second;
+    mode8x8[1][3]= xd->mode_info_context->bmi[10].as_mode.second;
 #endif
 
     error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4, &rate4x4_tokenonly,
-                                         &dist4x4, error16x16, 0);
+                                         &dist4x4, error16x16, 0, 0);
     error4x4d = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4d, &rate4x4_tokenonly,
-                                         &dist4x4d, error16x16, 1);
+                                         &dist4x4d, error16x16, 1, 0);
 
     if(error8x8> error16x16)
     {
@@ -3579,12 +3605,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
             if (error4x4d >= error4x4) // FIXME save original modes etc.
                 error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
                                                      &rate4x4_tokenonly,
-                                                     &dist4x4, error16x16, 0);
-            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+                                                     &dist4x4, error16x16, 0,
+                                                     cpi->update_context);
+            xd->mode_info_context->mbmi.mode = B_PRED;
         }
         else
         {
-            x->e_mbd.mode_info_context->mbmi.mode = mode16x16;
+            xd->mode_info_context->mbmi.mode = mode16x16;
             rate += rate16x16;
 
         }
@@ -3597,16 +3624,95 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
             if (error4x4d >= error4x4) // FIXME save original modes etc.
                 error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
                                                      &rate4x4_tokenonly,
-                                                     &dist4x4, error16x16, 0);
-            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+                                                     &dist4x4, error16x16, 0,
+                                                     cpi->update_context);
+            xd->mode_info_context->mbmi.mode = B_PRED;
         }
         else
         {
 
-            x->e_mbd.mode_info_context->mbmi.mode = I8X8_PRED;
+            xd->mode_info_context->mbmi.mode = I8X8_PRED;
             set_i8x8_block_modes(x, mode8x8);
             rate += rate8x8;
         }
     }
-    *rate_ = rate;
+    return rate;
+}
+
+int vp8cx_pick_mode_inter_macroblock
+(
+    VP8_COMP *cpi, MACROBLOCK *x,
+    int recon_yoffset, int recon_uvoffset
+)
+{
+    VP8_COMMON *cm = &cpi->common;
+    MACROBLOCKD *const xd = &x->e_mbd;
+    int rate;
+    int distortion;
+    int intra_error = 0;
+    unsigned char *segment_id = &xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_COMPRED
+     unsigned char ref_pred_flag;
+#endif
+
+    if (xd->segmentation_enabled)
+        x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
+    else
+        x->encode_breakout = cpi->oxcf.encode_breakout;
+
+    //if (cpi->sf.RD)
+    // For now this codebase is limited to a single rd encode path
+    {
+        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+        int single, compound, hybrid;
+
+        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
+                               &distortion, &intra_error, &single, &compound,
+                               &hybrid);
+
+        // TODO Save these to add in only if MB coding mode is selected?
+        cpi->rd_single_diff += single;
+        cpi->rd_comp_diff   += compound;
+        cpi->rd_hybrid_diff += hybrid;
+        if (xd->mode_info_context->mbmi.ref_frame &&
+            xd->mode_info_context->mbmi.mode != SPLITMV)
+        {
+            unsigned char pred_context;
+
+            pred_context = get_pred_context( cm, xd, PRED_COMP );
+
+            if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
+                cpi->single_pred_count[pred_context]++;
+            else
+                cpi->comp_pred_count[pred_context]++;
+        }
+
+        /* test code: set transform size based on mode selection */
+        if( cpi->common.txfm_mode == ALLOW_8X8
+            && xd->mode_info_context->mbmi.mode != I8X8_PRED
+            && xd->mode_info_context->mbmi.mode != B_PRED
+            && xd->mode_info_context->mbmi.mode != SPLITMV)
+        {
+            xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+            cpi->t8x8_count ++;
+        }
+        else
+        {
+            xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+            cpi->t4x4_count++;
+        }
+
+        /* restore cpi->zbin_mode_boost_enabled */
+        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+    }
+    //else
+    // The non rd encode path has been deleted from this code base
+    // to simplify development
+    //    vp8_pick_inter_mode
+
+    // Store metrics so they can be added in to totals if this mode is picked
+    x->mb_context[xd->mb_index].distortion  = distortion;
+    x->mb_context[xd->mb_index].intra_error = intra_error;
+
+    return rate;
 }
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index 9bb7e404bb3a2354a252955a8fc4ee5dde25d3fa..a7de498244dbb2e2d81e6321b45c847f365af38b 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -19,7 +19,7 @@ extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
 extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset,
                                    int *returnrate, int *returndistortion, int *returnintra,
                                    int *best_single_rd_diff, int *best_comp_rd_diff, int *best_hybrid_rd_diff);
-extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
+extern int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x);
 
 extern void vp8_mv_pred
 (
diff --git a/vpxenc.c b/vpxenc.c
index 094558da868248a3112c3c4c4bd7aa1059238623..489b30dc4e7ee28c681c81312cd9c86bc894b5fe 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1460,7 +1460,7 @@ static void show_rate_histogram(struct rate_hist          *hist,
 static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
 {
     int match = 1;
-    int i, j;
+    int i;
 
     match &= (img1->fmt == img2->fmt);
     match &= (img1->w == img2->w);