diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 1582e380d4012c66588f79ba9f1066b42d0487df..f3bbc17ce9fc40747d46632d8b24aedc7fc344b0 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -784,94 +784,170 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
   vp9_coeff_probs_model *old_frame_coef_probs =
       cpi->common.fc.coef_probs[tx_size];
   vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size];
-  int i, j, k, l, t;
-  int update[2] = {0, 0};
-  int savings;
-
+  const vp9_prob upd = VP9_COEF_UPDATE_PROB;
   const int entropy_nodes_update = UNCONSTRAINED_NODES;
+  int i, j, k, l, t;
+  switch (cpi->sf.use_fast_coef_updates) {
+    case 0: {
+      /* dry run to see if there is any udpate at all needed */
+      int savings = 0;
+      int update[2] = {0, 0};
+      for (i = 0; i < BLOCK_TYPES; ++i) {
+        for (j = 0; j < REF_TYPES; ++j) {
+          for (k = 0; k < COEF_BANDS; ++k) {
+            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+              for (t = 0; t < entropy_nodes_update; ++t) {
+                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+                const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
+                int s;
+                int u = 0;
+
+                if (l >= 3 && k == 0)
+                  continue;
+                if (t == PIVOT_NODE)
+                  s = vp9_prob_diff_update_savings_search_model(
+                      frame_branch_ct[i][j][k][l][0],
+                      old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+                else
+                  s = vp9_prob_diff_update_savings_search(
+                      frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
+                if (s > 0 && newp != oldp)
+                  u = 1;
+                if (u)
+                  savings += s - (int)(vp9_cost_zero(upd));
+                else
+                  savings -= (int)(vp9_cost_zero(upd));
+                update[u]++;
+              }
+            }
+          }
+        }
+      }
 
-  const int tstart = 0;
-  /* dry run to see if there is any udpate at all needed */
-  savings = 0;
-  for (i = 0; i < BLOCK_TYPES; ++i) {
-    for (j = 0; j < REF_TYPES; ++j) {
-      for (k = 0; k < COEF_BANDS; ++k) {
-        // int prev_coef_savings[ENTROPY_NODES] = {0};
-        for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          for (t = tstart; t < entropy_nodes_update; ++t) {
-            vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
-            const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
-            const vp9_prob upd = VP9_COEF_UPDATE_PROB;
-            int s;
-            int u = 0;
-
-            if (l >= 3 && k == 0)
-              continue;
-            if (t == PIVOT_NODE)
-              s = vp9_prob_diff_update_savings_search_model(
-                  frame_branch_ct[i][j][k][l][0],
-                  old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
-            else
-              s = vp9_prob_diff_update_savings_search(
-                  frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
-            if (s > 0 && newp != oldp)
-              u = 1;
-            if (u)
-              savings += s - (int)(vp9_cost_zero(upd));
-            else
-              savings -= (int)(vp9_cost_zero(upd));
-            update[u]++;
+      // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
+      /* Is coef updated at all */
+      if (update[1] == 0 || savings < 0) {
+        vp9_write_bit(bc, 0);
+        return;
+      }
+      vp9_write_bit(bc, 1);
+      for (i = 0; i < BLOCK_TYPES; ++i) {
+        for (j = 0; j < REF_TYPES; ++j) {
+          for (k = 0; k < COEF_BANDS; ++k) {
+            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+              // calc probs and branch cts for this frame only
+              for (t = 0; t < entropy_nodes_update; ++t) {
+                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+                vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+                const vp9_prob upd = VP9_COEF_UPDATE_PROB;
+                int s;
+                int u = 0;
+                if (l >= 3 && k == 0)
+                  continue;
+                if (t == PIVOT_NODE)
+                  s = vp9_prob_diff_update_savings_search_model(
+                      frame_branch_ct[i][j][k][l][0],
+                      old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+                else
+                  s = vp9_prob_diff_update_savings_search(
+                      frame_branch_ct[i][j][k][l][t],
+                      *oldp, &newp, upd);
+                if (s > 0 && newp != *oldp)
+                  u = 1;
+                vp9_write(bc, u, upd);
+#ifdef ENTROPY_STATS
+                if (!cpi->dummy_packing)
+                  ++tree_update_hist[tx_size][i][j][k][l][t][u];
+#endif
+                if (u) {
+                  /* send/use new probability */
+                  vp9_write_prob_diff_update(bc, newp, *oldp);
+                  *oldp = newp;
+                }
+              }
+            }
           }
         }
       }
+      return;
     }
-  }
 
-  // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
-  /* Is coef updated at all */
-  if (update[1] == 0 || savings < 0) {
-    vp9_write_bit(bc, 0);
-    return;
-  }
-  vp9_write_bit(bc, 1);
-  for (i = 0; i < BLOCK_TYPES; ++i) {
-    for (j = 0; j < REF_TYPES; ++j) {
-      for (k = 0; k < COEF_BANDS; ++k) {
-        // int prev_coef_savings[ENTROPY_NODES] = {0};
-        for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          // calc probs and branch cts for this frame only
-          for (t = tstart; t < entropy_nodes_update; ++t) {
-            vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
-            vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
-            const vp9_prob upd = VP9_COEF_UPDATE_PROB;
-            int s;
-            int u = 0;
-            if (l >= 3 && k == 0)
-              continue;
-            if (t == PIVOT_NODE)
-              s = vp9_prob_diff_update_savings_search_model(
-                  frame_branch_ct[i][j][k][l][0],
-                  old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
-            else
-              s = vp9_prob_diff_update_savings_search(
-                  frame_branch_ct[i][j][k][l][t],
-                  *oldp, &newp, upd);
-            if (s > 0 && newp != *oldp)
-              u = 1;
-            vp9_write(bc, u, upd);
+    case 1:
+    case 2: {
+      const int prev_coef_contexts_to_update =
+          (cpi->sf.use_fast_coef_updates == 2 ?
+           PREV_COEF_CONTEXTS >> 1 : PREV_COEF_CONTEXTS);
+      const int coef_band_to_update =
+          (cpi->sf.use_fast_coef_updates == 2 ?
+           COEF_BANDS >> 1 : COEF_BANDS);
+      int updates = 0;
+      int noupdates_before_first = 0;
+      for (i = 0; i < BLOCK_TYPES; ++i) {
+        for (j = 0; j < REF_TYPES; ++j) {
+          for (k = 0; k < COEF_BANDS; ++k) {
+            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+              // calc probs and branch cts for this frame only
+              for (t = 0; t < entropy_nodes_update; ++t) {
+                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+                vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+                int s;
+                int u = 0;
+                if (l >= 3 && k == 0)
+                  continue;
+                if (l >= prev_coef_contexts_to_update ||
+                    k >= coef_band_to_update) {
+                  u = 0;
+                } else {
+                  if (t == PIVOT_NODE)
+                    s = vp9_prob_diff_update_savings_search_model(
+                        frame_branch_ct[i][j][k][l][0],
+                        old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+                  else
+                    s = vp9_prob_diff_update_savings_search(
+                        frame_branch_ct[i][j][k][l][t],
+                        *oldp, &newp, upd);
+                  if (s > 0 && newp != *oldp)
+                    u = 1;
+                }
+                updates += u;
+                if (u == 0 && updates == 0) {
+                  noupdates_before_first++;
 #ifdef ENTROPY_STATS
-            if (!cpi->dummy_packing)
-              ++tree_update_hist[tx_size][i][j][k][l][t][u];
+                  if (!cpi->dummy_packing)
+                    ++tree_update_hist[tx_size][i][j][k][l][t][u];
 #endif
-            if (u) {
-              /* send/use new probability */
-              vp9_write_prob_diff_update(bc, newp, *oldp);
-              *oldp = newp;
+                  continue;
+                }
+                if (u == 1 && updates == 1) {
+                  int v;
+                  // first update
+                  vp9_write_bit(bc, 1);
+                  for (v = 0; v < noupdates_before_first; ++v)
+                    vp9_write(bc, 0, upd);
+                }
+                vp9_write(bc, u, upd);
+#ifdef ENTROPY_STATS
+                if (!cpi->dummy_packing)
+                  ++tree_update_hist[tx_size][i][j][k][l][t][u];
+#endif
+                if (u) {
+                  /* send/use new probability */
+                  vp9_write_prob_diff_update(bc, newp, *oldp);
+                  *oldp = newp;
+                }
+              }
             }
           }
         }
       }
+      if (updates == 0) {
+        vp9_write_bit(bc, 0);  // no updates
+      }
+      return;
     }
+
+    default:
+      assert(0);
   }
 }
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 34bd43ef15c2e78bfc858ed47d3a31608f90eeea..22ad682aead61474951f787e4c8f5517beb19df7 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -741,6 +741,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
   sf->use_fast_lpf_pick = 0;
+  sf->use_fast_coef_updates = 0;
   sf->using_small_partition_info = 0;
   // Skip any mode not chosen at size < X for all sizes > X
   // Hence BLOCK_64X64 (skip is off)
@@ -802,6 +803,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
         sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
         sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
+        sf->use_fast_coef_updates = 1;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -840,6 +842,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->auto_min_max_partition_interval = 2;
         sf->disable_split_var_thresh = 32;
         sf->disable_filter_search_var_thresh = 32;
+        sf->use_fast_coef_updates = 2;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -866,6 +869,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->disable_filter_search_var_thresh = 64;
         sf->intra_y_mode_mask = INTRA_DC_ONLY;
         sf->intra_uv_mode_mask = INTRA_DC_ONLY;
+        sf->use_fast_coef_updates = 2;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -894,6 +898,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->subpel_iters_per_step = 1;
         sf->disable_split_var_thresh = 64;
         sf->disable_filter_search_var_thresh = 96;
+        sf->use_fast_coef_updates = 2;
       }
       /*
       if (speed == 2) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 940763d7648f99cc57a2f123c98df81c417b2732..0933185b99bca6003d817334e5a67879b41bd817 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -298,6 +298,7 @@ typedef struct {
   int use_rd_breakout;
   int use_uv_intra_rd_estimate;
   int use_fast_lpf_pick;
+  int use_fast_coef_updates;  // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
 } SPEED_FEATURES;
 
 typedef struct VP9_COMP {