diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 7dbc252271ba2a1e9bf1e19295dccdab55e5bc7a..bac40c52754d33c146e343dec7c312d74f95f05d 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -227,6 +227,10 @@ typedef struct macroblockd {
 
   int q_index;
 
+  /* Y,U,V,(A) */
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
+
   PARTITION_CONTEXT *above_seg_context;
   PARTITION_CONTEXT left_seg_context[8];
 } MACROBLOCKD;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 382f992066830baf2bbb716b26ed69992894d7b0..77d7f857c1f04c04ccd6fd7ebd5cf9f1ac994445 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -204,7 +204,8 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
 // tile.
 static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
   VP9_COMMON *const cm = &pbi->common;
-  int tile_col;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+  int i, tile_col;
 
   CHECK_MEM_ERROR(cm, pbi->mi_streams,
                   vpx_realloc(pbi->mi_streams, tile_cols *
@@ -215,12 +216,24 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
         &cm->mi[cm->mi_rows * cm->cur_tile_mi_col_start];
   }
 
+  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
+  // block where mi unit size is 8x8.
+  CHECK_MEM_ERROR(cm, pbi->above_context[0],
+                  vpx_realloc(pbi->above_context[0],
+                              sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
+                              2 * aligned_mi_cols));
+  for (i = 1; i < MAX_MB_PLANE; ++i) {
+    pbi->above_context[i] = pbi->above_context[0] +
+                            i * sizeof(*pbi->above_context[0]) *
+                            2 * aligned_mi_cols;
+  }
+
   // This is sized based on the entire frame. Each tile operates within its
   // column bounds.
   CHECK_MEM_ERROR(cm, pbi->above_seg_context,
                   vpx_realloc(pbi->above_seg_context,
                               sizeof(*pbi->above_seg_context) *
-                              mi_cols_aligned_to_sb(cm->mi_cols)));
+                              aligned_mi_cols));
 }
 
 static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -335,7 +348,7 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize,
   // cannot be used.
   xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
 
-  set_skip_context(xd, cm->above_context, cm->left_context, mi_row, mi_col);
+  set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -719,7 +732,12 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
 
 static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
                                int tile_col) {
+  int i;
   xd->mi_stream = pbi->mi_streams[tile_col];
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->above_context[i] = pbi->above_context[i];
+  }
   // see note in alloc_tile_storage().
   xd->above_seg_context = pbi->above_seg_context;
 }
@@ -744,7 +762,7 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
   for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end;
        mi_row += MI_BLOCK_SIZE) {
     // For a SB there are 2 left contexts, each pertaining to a MB row within
-    vp9_zero(cm->left_context);
+    vp9_zero(xd->left_context);
     vp9_zero(xd->left_seg_context);
     for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
          mi_col += MI_BLOCK_SIZE)
@@ -811,8 +829,9 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cm->above_context[0], 0,
-             sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE * (2 * aligned_mi_cols));
+  vpx_memset(pbi->above_context[0], 0,
+             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
+             2 * aligned_mi_cols);
 
   vpx_memset(pbi->above_seg_context, 0,
              sizeof(*pbi->above_seg_context) * aligned_mi_cols);
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 95487355595d6eb7a83506fdcfc150cfd8c64554..5a1056d4605e1ad78b3c9368cb4387142d0214ea 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -163,6 +163,7 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
   vp9_worker_end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   vpx_free(pbi->mi_streams);
+  vpx_free(pbi->above_context[0]);
   vpx_free(pbi->above_seg_context);
   vpx_free(pbi);
 }
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 3f00cd39f412f9c325c65df8013b2688bfaf2729..77399529a2587e637b0fcd6cccd7bc68aad1c568 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -44,6 +44,7 @@ typedef struct VP9Decompressor {
      tile column index. */
   MODE_INFO **mi_streams;
 
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
   PARTITION_CONTEXT *above_seg_context;
 } VP9D_COMP;