diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index b64d98c37c8e6955aa2d4d97503d7e0fda276fd2..c65d209bf2ee5f6545b389eb526b35954fa62853 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -241,7 +241,8 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
 }
 
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
-                                    TX_SIZE tx_size, uint8_t *dst, int stride) {
+                                    TX_SIZE tx_size, uint8_t *dst, int stride,
+                                    uint8_t *token_cache) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const int eob = pd->eobs[block];
   if (eob > 0) {
@@ -274,13 +275,20 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
 
     if (eob == 1) {
       vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
+      vpx_memset(token_cache, 0, 2 * sizeof(token_cache[0]));
     } else {
-      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) {
         vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
-      else if (tx_size == TX_32X32 && eob <= 34)
+        vpx_memset(token_cache, 0,
+                   4 * (4 << tx_size) * sizeof(token_cache[0]));
+      } else if (tx_size == TX_32X32 && eob <= 34) {
         vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
-      else
+        vpx_memset(token_cache, 0, 256 * sizeof(token_cache[0]));
+      } else {
         vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+        vpx_memset(token_cache, 0,
+                   (16 << (tx_size << 1)) * sizeof(token_cache[0]));
+      }
     }
   }
 }
@@ -319,7 +327,8 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
   if (!mi->mbmi.skip_coeff) {
     vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,
                             args->r, args->token_cache);
-    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride);
+    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
+                            args->token_cache);
   }
 }
 
@@ -345,7 +354,7 @@ static void reconstruct_inter_block(int plane, int block,
                                              args->r, args->token_cache);
   inverse_transform_block(xd, plane, block, tx_size,
                           &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
-                          pd->dst.stride);
+                          pd->dst.stride, args->token_cache);
 }
 
 static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -946,6 +955,7 @@ static void setup_tile_macroblockd(TileWorkerData *const tile_data) {
     pd[i].dqcoeff = tile_data->dqcoeff[i];
     pd[i].eobs    = tile_data->eobs[i];
     vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+    vpx_memset(tile_data->token_cache, 0, sizeof(tile_data->token_cache));
   }
 }
 
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 75e7e40ae47d220c1901865f33896009102de1a6..f6219c52776ddcbb93a4023d8d447a280b645b84 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -81,6 +81,7 @@ static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = {
     INCREMENT_COUNT(token);                              \
     token_cache[scan[c]] = vp9_pt_energy_class[token];   \
     ++c;                                                 \
+    pt = get_coef_context(nb, token_cache, c);           \
     dqv = dq[1];                                          \
     continue;                                            \
   }
@@ -118,31 +119,24 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
 
   while (c < seg_eob) {
     int val;
-    if (c)
-      pt = get_coef_context(nb, token_cache, c);
     band = *band_translate++;
     prob = coef_probs[band][pt];
     if (!cm->frame_parallel_decoding_mode)
       ++eob_branch_count[band][pt];
     if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
       break;
-    goto DECODE_ZERO;
-
-  SKIP_START:
-    if (c >= seg_eob)
-      break;
-    if (c)
-      pt = get_coef_context(nb, token_cache, c);
-    band = *band_translate++;
-    prob = coef_probs[band][pt];
 
   DECODE_ZERO:
     if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
-      token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
-      dqv = dq[1];                                          \
+      dqv = dq[1];
       ++c;
-      goto SKIP_START;
+      if (c >= seg_eob)
+        break;
+      pt = get_coef_context(nb, token_cache, c);
+      band = *band_translate++;
+      prob = coef_probs[band][pt];
+      goto DECODE_ZERO;
     }
 
     // ONE_CONTEXT_NODE_0_