diff --git a/vp8/dixie/bool_decoder.c b/vp8/dixie/bool_decoder.c
index 98002b0c0324b3f5cbf1d44dc9da371fadd9fc83..bda52d2a2d8867fa3735aa055b18015188deef88 100644
--- a/vp8/dixie/bool_decoder.c
+++ b/vp8/dixie/bool_decoder.c
@@ -29,7 +29,7 @@ int vp8dx_bool_init(struct bool_decoder *br, const unsigned char *source,
     br->user_buffer_end = source + source_sz;
     br->user_buffer     = source;
     br->value    = 0;
-    br->count    = 0;
+    br->count    = -8;
     br->range    = 255;
 
     if (source_sz && !source)
@@ -44,31 +44,18 @@ int vp8dx_bool_init(struct bool_decoder *br, const unsigned char *source,
 
 void vp8dx_bool_fill(struct bool_decoder *br)
 {
-    const unsigned char *ptr;
-    const unsigned char *end;
+    const unsigned char *bufptr;
+    const unsigned char *bufend;
     vp8_bool_value_t     value;
     int                  count;
-    end = br->user_buffer_end;
-    ptr = br->user_buffer;
+    bufend = br->user_buffer_end;
+    bufptr = br->user_buffer;
     value = br->value;
     count = br->count;
 
-    for (;;)
-    {
-        if (ptr >= end)
-        {
-            count = VP8_LOTS_OF_BITS;
-            break;
-        }
+    VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
 
-        if (count > VP8_BD_VALUE_SIZE - 8)
-            break;
-
-        count += 8;
-        value |= (vp8_bool_value_t) * ptr++ << (VP8_BD_VALUE_SIZE - count);
-    }
-
-    br->user_buffer = ptr;
+    br->user_buffer = bufptr;
     br->value = value;
     br->count = count;
 }
diff --git a/vp8/dixie/bool_decoder.h b/vp8/dixie/bool_decoder.h
index f6801a11c9e8e0e9f78fcda0cc0251fa61be30f1..bda472f278d59f1fb943bc26408269fe01fbccdc 100644
--- a/vp8/dixie/bool_decoder.h
+++ b/vp8/dixie/bool_decoder.h
@@ -45,6 +45,30 @@ int  vp8dx_bool_init(struct bool_decoder *br, const unsigned char *source,
 void vp8dx_bool_fill(struct bool_decoder *br);
 
 
+/*The refill loop is used in several places, so define it in a macro to make
+   sure they're all consistent.
+  An inline function would be cleaner, but has a significant penalty, because
+   multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+   enough to eliminate the stores to those fields and the subsequent reloads
+   from them when inlining the function.*/
+#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+    do \
+    { \
+        int shift; \
+        for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+        { \
+            if((_bufptr) >= (_bufend)) { \
+                (_count) = VP8_LOTS_OF_BITS; \
+                break; \
+            } \
+            (_count) += 8; \
+            (_value) |= (vp8_bool_value_t)*(_bufptr)++ << shift; \
+            shift -= 8; \
+        } \
+    } \
+    while(0)
+
+
 static int bool_get(struct bool_decoder *br, int probability)
 {
     unsigned int bit = 0;
@@ -81,7 +105,7 @@ static int bool_get(struct bool_decoder *br, int probability)
     br->count = count;
     br->range = range;
 
-    if (count < 8)
+    if (count < 0)
         vp8dx_bool_fill(br);
 
     return bit;