diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 869f6e20f2dd0106b61cd3a119a237e3ad162c73..fb9aa2e2504c3f708b0f54a2508a7ca21fc963a2 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -188,7 +188,7 @@ void vp8_setup_version(VP8_COMMON *cm)
 void vp8_create_common(VP8_COMMON *oci)
 {
     vp8_machine_specific_config(oci);
-    vp8_default_coef_probs(oci);
+
     vp8_init_mbmode_probs(oci);
     vp8_default_bmode_probs(oci->fc.bmode_prob);
 
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
new file mode 100755
index 0000000000000000000000000000000000000000..0d195636bcb70b8b2f372ffa91cf5c35b7b9ecca
--- /dev/null
+++ b/vp8/common/default_coef_probs.h
@@ -0,0 +1,188 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+*/
+
+
+/*Generated file, included by entropy.c*/
+
+
+static const vp8_prob default_coef_probs [BLOCK_TYPES]
+                                         [COEF_BANDS]
+                                         [PREV_COEF_CONTEXTS]
+                                         [ENTROPY_NODES] =
+{
+    { /* Block Type ( 0 ) */
+        { /* Coeff Band ( 0 )*/
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+            { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+            { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+            { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+            {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+            { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+            {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+            { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+            {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+            { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+            { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+            { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+            {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 1 ) */
+        { /* Coeff Band ( 0 )*/
+            { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+            { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+            {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+            { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+            {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+            {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+            {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+            { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+            {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+            {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+            {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+            { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+            {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+            { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+            {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 2 ) */
+        { /* Coeff Band ( 0 )*/
+            { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+            { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+            {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+            { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+            { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+            { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+            {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+            { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+            {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 3 ) */
+        { /* Coeff Band ( 0 )*/
+            { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+            { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+            {  61,  46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+            { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+            {  39,  77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+            { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+            {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+            { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+            {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+            { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+            {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+            { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+            {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+            { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+            {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    }
+};
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
deleted file mode 100644
index 293e74269f4479a0b1c695b783c306a07e4c14a6..0000000000000000000000000000000000000000
--- a/vp8/common/defaultcoefcounts.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef __DEFAULTCOEFCOUNTS_H
-#define __DEFAULTCOEFCOUNTS_H
-
-#include "entropy.h"
-
-extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
-                                                 [COEF_BANDS]
-                                                 [PREV_COEF_CONTEXTS]
-                                                 [MAX_ENTROPY_TOKENS];
-
-extern const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
-                                                    [COEF_BANDS]
-                                                    [PREV_COEF_CONTEXTS]
-                                                    [MAX_ENTROPY_TOKENS];
-
-#endif
\ No newline at end of file
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index ca37aab22abd44b54245dbc4795c3f7333601eb0..34b54b9a77139ec92589b8b281cea2e0c0f2fe3d 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -15,6 +15,7 @@
 #include "string.h"
 #include "blockd.h"
 #include "onyxc_int.h"
+#include "vpx_mem/vpx_mem.h"
 
 #define uchar unsigned char     /* typedefs can clash */
 #define uint  unsigned int
@@ -193,36 +194,13 @@ vp8_extra_bit_struct vp8_extra_bits[12] =
 #endif
     { 0, 0, 0, 0}
 };
-#include "defaultcoefcounts.h"
+
+#include "default_coef_probs.h"
 
 void vp8_default_coef_probs(VP8_COMMON *pc)
 {
-    int h = 0;
-
-    do
-    {
-        int i = 0;
-
-        do
-        {
-            int k = 0;
-
-            do
-            {
-                unsigned int branch_ct [ENTROPY_NODES] [2];
-                vp8_tree_probs_from_distribution(
-                    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
-                    pc->fc.coef_probs[h][i][k],
-                    branch_ct,
-                    vp8_default_coef_counts[h][i][k],
-                    256, 1);
-
-            }
-            while (++k < PREV_COEF_CONTEXTS);
-        }
-        while (++i < COEF_BANDS);
-    }
-    while (++h < BLOCK_TYPES);
+    vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
+                   sizeof(default_coef_probs));
 #if CONFIG_T8X8
     h = 0;
     do
@@ -250,7 +228,6 @@ void vp8_default_coef_probs(VP8_COMMON *pc)
 #endif
 }
 
-
 void vp8_coef_tree_initialize()
 {
     init_bit_trees();
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index 34a7e18aea727ec7175f57de50d3832d4a13df2a..83d3765ff8e1ec51de76f7c124c59488530515d7 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -11,7 +11,7 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;void idct_dequant_0_2x_sse2
+;void vp8_idct_dequant_0_2x_sse2
 ; (
 ;   short *qcoeff       - 0
 ;   short *dequant      - 1
@@ -21,8 +21,8 @@
 ;   int blk_stride      - 5
 ; )
 
-global sym(idct_dequant_0_2x_sse2)
-sym(idct_dequant_0_2x_sse2):
+global sym(vp8_idct_dequant_0_2x_sse2)
+sym(vp8_idct_dequant_0_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -97,8 +97,8 @@ sym(idct_dequant_0_2x_sse2):
     pop         rbp
     ret
 
-global sym(idct_dequant_full_2x_sse2)
-sym(idct_dequant_full_2x_sse2):
+global sym(vp8_idct_dequant_full_2x_sse2)
+sym(vp8_idct_dequant_full_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -353,7 +353,7 @@ sym(idct_dequant_full_2x_sse2):
     pop         rbp
     ret
 
-;void idct_dequant_dc_0_2x_sse2
+;void vp8_idct_dequant_dc_0_2x_sse2
 ; (
 ;   short *qcoeff       - 0
 ;   short *dequant      - 1
@@ -362,8 +362,8 @@ sym(idct_dequant_full_2x_sse2):
 ;   int dst_stride      - 4
 ;   short *dc           - 5
 ; )
-global sym(idct_dequant_dc_0_2x_sse2)
-sym(idct_dequant_dc_0_2x_sse2):
+global sym(vp8_idct_dequant_dc_0_2x_sse2)
+sym(vp8_idct_dequant_dc_0_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -438,8 +438,8 @@ sym(idct_dequant_dc_0_2x_sse2):
     pop         rbp
     ret
 
-global sym(idct_dequant_dc_full_2x_sse2)
-sym(idct_dequant_dc_full_2x_sse2):
+global sym(vp8_idct_dequant_dc_full_2x_sse2)
+sym(vp8_idct_dequant_dc_full_2x_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 4efff7eb584d227a16b9dabe66094ffbf0814b68..295609c58167a1cb6018b0f3366bd6f467217ee0 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1395,8 +1395,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         neg         rax
 
         ; calculate mask
-        movdqu      xmm1, [rsi+2*rax]       ; p1
-        movdqu      xmm0, [rdi]             ; q1
+        movdqa      xmm1, [rsi+2*rax]       ; p1
+        movdqa      xmm0, [rdi]             ; q1
         movdqa      xmm2, xmm1
         movdqa      xmm7, xmm0
         movdqa      xmm4, xmm0
@@ -1406,8 +1406,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         pand        xmm1, [GLOBAL(tfe)]     ; set lsb of each byte to zero
         psrlw       xmm1, 1                 ; abs(p1-q1)/2
 
-        movdqu      xmm5, [rsi+rax]         ; p0
-        movdqu      xmm4, [rsi]             ; q0
+        movdqa      xmm5, [rsi+rax]         ; p0
+        movdqa      xmm4, [rsi]             ; q0
         movdqa      xmm0, xmm4              ; q0
         movdqa      xmm6, xmm5              ; p0
         psubusb     xmm5, xmm4              ; p0-=q0
@@ -1449,7 +1449,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
 
         psubsb      xmm3, xmm0              ; q0-= q0 add
         pxor        xmm3, [GLOBAL(t80)]     ; unoffset
-        movdqu      [rsi], xmm3             ; write back
+        movdqa      [rsi], xmm3             ; write back
 
         ; now do +3 side
         psubsb      xmm5, [GLOBAL(t1s)]     ; +3 instead of +4
@@ -1465,7 +1465,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
 
         paddsb      xmm6, xmm0              ; p0+= p0 add
         pxor        xmm6, [GLOBAL(t80)]     ; unoffset
-        movdqu      [rsi+rax], xmm6         ; write back
+        movdqa      [rsi+rax], xmm6         ; write back
 
     ; begin epilog
     pop rdi
@@ -1507,17 +1507,17 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         lea         rdx,        [rsi + rax*4]
         lea         rcx,        [rdx + rax]
 
-        movdqu      xmm0,       [rsi]                   ; (high 96 bits unused) 03 02 01 00
-        movdqu      xmm1,       [rdx]                   ; (high 96 bits unused) 43 42 41 40
-        movdqu      xmm2,       [rdi]                   ; 13 12 11 10
-        movdqu      xmm3,       [rcx]                   ; 53 52 51 50
+        movd        xmm0,       [rsi]                   ; (high 96 bits unused) 03 02 01 00
+        movd        xmm1,       [rdx]                   ; (high 96 bits unused) 43 42 41 40
+        movd        xmm2,       [rdi]                   ; 13 12 11 10
+        movd        xmm3,       [rcx]                   ; 53 52 51 50
         punpckldq   xmm0,       xmm1                    ; (high 64 bits unused) 43 42 41 40 03 02 01 00
         punpckldq   xmm2,       xmm3                    ; 53 52 51 50 13 12 11 10
 
-        movdqu      xmm4,       [rsi + rax*2]           ; 23 22 21 20
-        movdqu      xmm5,       [rdx + rax*2]           ; 63 62 61 60
-        movdqu      xmm6,       [rdi + rax*2]           ; 33 32 31 30
-        movdqu      xmm7,       [rcx + rax*2]           ; 73 72 71 70
+        movd        xmm4,       [rsi + rax*2]           ; 23 22 21 20
+        movd        xmm5,       [rdx + rax*2]           ; 63 62 61 60
+        movd        xmm6,       [rdi + rax*2]           ; 33 32 31 30
+        movd        xmm7,       [rcx + rax*2]           ; 73 72 71 70
         punpckldq   xmm4,       xmm5                    ; 63 62 61 60 23 22 21 20
         punpckldq   xmm6,       xmm7                    ; 73 72 71 70 33 32 31 30
 
@@ -1540,17 +1540,17 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         lea         rdx,        [rsi + rax*4]
         lea         rcx,        [rdx + rax]
 
-        movdqu      xmm4,       [rsi]                   ; 83 82 81 80
-        movdqu      xmm1,       [rdx]                   ; c3 c2 c1 c0
-        movdqu      xmm6,       [rdi]                   ; 93 92 91 90
-        movdqu      xmm3,       [rcx]                   ; d3 d2 d1 d0
+        movd        xmm4,       [rsi]                   ; 83 82 81 80
+        movd        xmm1,       [rdx]                   ; c3 c2 c1 c0
+        movd        xmm6,       [rdi]                   ; 93 92 91 90
+        movd        xmm3,       [rcx]                   ; d3 d2 d1 d0
         punpckldq   xmm4,       xmm1                    ; c3 c2 c1 c0 83 82 81 80
         punpckldq   xmm6,       xmm3                    ; d3 d2 d1 d0 93 92 91 90
 
-        movdqu      xmm0,       [rsi + rax*2]           ; a3 a2 a1 a0
-        movdqu      xmm5,       [rdx + rax*2]           ; e3 e2 e1 e0
-        movdqu      xmm2,       [rdi + rax*2]           ; b3 b2 b1 b0
-        movdqu      xmm7,       [rcx + rax*2]           ; f3 f2 f1 f0
+        movd        xmm0,       [rsi + rax*2]           ; a3 a2 a1 a0
+        movd        xmm5,       [rdx + rax*2]           ; e3 e2 e1 e0
+        movd        xmm2,       [rdi + rax*2]           ; b3 b2 b1 b0
+        movd        xmm7,       [rcx + rax*2]           ; f3 f2 f1 f0
         punpckldq   xmm0,       xmm5                    ; e3 e2 e1 e0 a3 a2 a1 a0
         punpckldq   xmm2,       xmm7                    ; f3 f2 f1 f0 b3 b2 b1 b0
 
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
deleted file mode 100644
index 6b6321ace3d672617105085cfb36956262e7135b..0000000000000000000000000000000000000000
--- a/vp8/common/x86/postproc_mmx.c
+++ /dev/null
@@ -1,1508 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <math.h>
-#include <stdlib.h>
-#include "vpx_scale/yv12config.h"
-#include "pragmas.h"
-
-#define VP8_FILTER_WEIGHT 128
-#define VP8_FILTER_SHIFT  7
-
-
-
-/* static constants */
-__declspec(align(16))
-const static short  Blur[48] =
-{
-
-    16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16,
-    64, 64, 64, 64, 64, 64, 64, 64,
-    16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16,
-    0,  0,  0,  0,  0,  0,  0,  0,
-
-};
-#define RD  __declspec(align(16)) __int64 rd  = 0x0040004000400040;
-#define R4D2 __declspec(align(16)) __int64 rd42[2] = {0x0004000400040004,0x0004000400040004};
-
-#ifndef RELOCATEABLE
-const static RD;
-const static R4D2;
-#endif
-
-
-/* external references */
-extern double vp8_gaussian(double sigma, double mu, double x);
-extern short vp8_rv[];
-extern int vp8_q2mbl(int x) ;
-
-
-
-void vp8_post_proc_down_and_across_mmx
-(
-    unsigned char *src_ptr,
-    unsigned char *dst_ptr,
-    int src_pixels_per_line,
-    int dst_pixels_per_line,
-    int rows,
-    int cols,
-    int flimit
-)
-{
-#ifdef RELOCATEABLE
-    RD
-    R4D2
-#endif
-
-    __asm
-    {
-        push        ebx
-        lea         ebx, Blur
-        movd        mm2, flimit
-        punpcklwd   mm2, mm2
-        punpckldq   mm2, mm2
-
-        mov         esi,        src_ptr
-        mov         edi,        dst_ptr
-
-        mov         ecx, DWORD PTR rows
-        mov         eax, src_pixels_per_line ;
-        destination pitch?
-        pxor        mm0, mm0              ;
-        mm0 = 00000000
-
-        nextrow:
-
-        xor         edx,        edx       ;
-
-        clear out edx for use as loop counter
-        nextcol:
-
-        pxor        mm7, mm7              ;
-
-    mm7 = 00000000
-    movq        mm6, [ebx + 32 ]      ;
-        mm6 = kernel 2 taps
-        movq        mm3, [esi]            ;
-        mm4 = r0 p0..p7
-        punpcklbw   mm3, mm0              ;
-        mm3 = p0..p3
-        movq        mm1, mm3              ;
-        mm1 = p0..p3
-        pmullw      mm3, mm6              ;
-        mm3 *= kernel 2 modifiers
-
-        movq        mm6, [ebx + 48]       ;
-        mm6 = kernel 3 taps
-        movq        mm5, [esi + eax]      ;
-        mm4 = r1 p0..p7
-        punpcklbw   mm5, mm0              ;
-        mm5 = r1 p0..p3
-        pmullw      mm6, mm5              ;
-        mm6 *= p0..p3 * kernel 3 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm6
-
-        ;
-        thresholding
-        movq        mm7, mm1              ;
-        mm7 = r0 p0..p3
-        psubusw     mm7, mm5              ;
-        mm7 = r0 p0..p3 - r1 p0..p3
-        psubusw     mm5, mm1              ;
-        mm5 = r1 p0..p3 - r0 p0..p3
-        paddusw     mm7, mm5              ;
-        mm7 = abs(r0 p0..p3 - r1 p0..p3)
-        pcmpgtw     mm7, mm2
-
-        movq        mm6, [ebx + 64 ]      ;
-        mm6 = kernel 4 modifiers
-        movq        mm5, [esi + 2*eax]    ;
-        mm4 = r2 p0..p7
-        punpcklbw   mm5, mm0              ;
-        mm5 = r2 p0..p3
-        pmullw      mm6, mm5              ;
-        mm5 *= kernel 4 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = r0 p0..p3
-        psubusw     mm6, mm5              ;
-        mm6 = r0 p0..p3 - r2 p0..p3
-        psubusw     mm5, mm1              ;
-        mm5 = r2 p0..p3 - r2 p0..p3
-        paddusw     mm6, mm5              ;
-        mm6 = abs(r0 p0..p3 - r2 p0..p3)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-
-        neg         eax
-        movq        mm6, [ebx ]           ;
-        kernel 0 taps
-        movq        mm5, [esi+2*eax]      ;
-        mm4 = r-2 p0..p7
-        punpcklbw   mm5, mm0              ;
-        mm5 = r-2 p0..p3
-        pmullw      mm6, mm5              ;
-        mm5 *= kernel 0 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = r0 p0..p3
-        psubusw     mm6, mm5              ;
-        mm6 = p0..p3 - r-2 p0..p3
-        psubusw     mm5, mm1              ;
-        mm5 = r-2 p0..p3 - p0..p3
-        paddusw     mm6, mm5              ;
-        mm6 = abs(r0 p0..p3 - r-2 p0..p3)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-        movq        mm6, [ebx + 16]       ;
-        kernel 1 taps
-        movq        mm4, [esi+eax]        ;
-        mm4 = r-1 p0..p7
-        punpcklbw   mm4, mm0              ;
-        mm4 = r-1 p0..p3
-        pmullw      mm6, mm4              ;
-        mm4 *= kernel 1 modifiers.
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = r0 p0..p3
-        psubusw     mm6, mm4              ;
-        mm6 = p0..p3 - r-2 p0..p3
-        psubusw     mm4, mm1              ;
-        mm5 = r-1 p0..p3 - p0..p3
-        paddusw     mm6, mm4              ;
-        mm6 = abs(r0 p0..p3 - r-1 p0..p3)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-
-        paddusw     mm3, rd               ;
-        mm3 += round value
-        psraw       mm3, VP8_FILTER_SHIFT     ;
-        mm3 /= 128
-
-        pand        mm1, mm7              ;
-        mm1 select vals > thresh from source
-        pandn       mm7, mm3              ;
-        mm7 select vals < thresh from blurred result
-        paddusw     mm1, mm7              ;
-        combination
-
-        packuswb    mm1, mm0              ;
-        pack to bytes
-
-        movd        [edi], mm1            ;
-        neg         eax                   ;
-        pitch is positive
-
-
-        add         esi, 4
-        add         edi, 4
-        add         edx, 4
-
-        cmp         edx, cols
-        jl          nextcol
-        // done with the all cols, start the across filtering in place
-        sub         esi, edx
-        sub         edi, edx
-
-
-        push        eax
-        xor         edx,    edx
-        mov         eax,    [edi-4];
-
-        acrossnextcol:
-        pxor        mm7, mm7              ;
-        mm7 = 00000000
-        movq        mm6, [ebx + 32 ]      ;
-        movq        mm4, [edi+edx]        ;
-        mm4 = p0..p7
-        movq        mm3, mm4              ;
-        mm3 = p0..p7
-        punpcklbw   mm3, mm0              ;
-        mm3 = p0..p3
-        movq        mm1, mm3              ;
-        mm1 = p0..p3
-        pmullw      mm3, mm6              ;
-        mm3 *= kernel 2 modifiers
-
-        movq        mm6, [ebx + 48]
-        psrlq       mm4, 8                ;
-        mm4 = p1..p7
-        movq        mm5, mm4              ;
-        mm5 = p1..p7
-        punpcklbw   mm5, mm0              ;
-        mm5 = p1..p4
-        pmullw      mm6, mm5              ;
-        mm6 *= p1..p4 * kernel 3 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm6
-
-        ;
-        thresholding
-        movq        mm7, mm1              ;
-        mm7 = p0..p3
-        psubusw     mm7, mm5              ;
-        mm7 = p0..p3 - p1..p4
-        psubusw     mm5, mm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     mm7, mm5              ;
-        mm7 = abs(p0..p3 - p1..p4)
-        pcmpgtw     mm7, mm2
-
-        movq        mm6, [ebx + 64 ]
-        psrlq       mm4, 8                ;
-        mm4 = p2..p7
-        movq        mm5, mm4              ;
-        mm5 = p2..p7
-        punpcklbw   mm5, mm0              ;
-        mm5 = p2..p5
-        pmullw      mm6, mm5              ;
-        mm5 *= kernel 4 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = p0..p3
-        psubusw     mm6, mm5              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     mm5, mm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     mm6, mm5              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-
-        movq        mm6, [ebx ]
-        movq        mm4, [edi+edx-2]      ;
-        mm4 = p-2..p5
-        movq        mm5, mm4              ;
-        mm5 = p-2..p5
-        punpcklbw   mm5, mm0              ;
-        mm5 = p-2..p1
-        pmullw      mm6, mm5              ;
-        mm5 *= kernel 0 modifiers
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = p0..p3
-        psubusw     mm6, mm5              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     mm5, mm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     mm6, mm5              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-        movq        mm6, [ebx + 16]
-        psrlq       mm4, 8                ;
-        mm4 = p-1..p5
-        punpcklbw   mm4, mm0              ;
-        mm4 = p-1..p2
-        pmullw      mm6, mm4              ;
-        mm4 *= kernel 1 modifiers.
-        paddusw     mm3, mm6              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movq        mm6, mm1              ;
-        mm6 = p0..p3
-        psubusw     mm6, mm4              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     mm4, mm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     mm6, mm4              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     mm6, mm2
-        por         mm7, mm6              ;
-        accumulate thresholds
-
-        paddusw     mm3, rd               ;
-        mm3 += round value
-        psraw       mm3, VP8_FILTER_SHIFT     ;
-        mm3 /= 128
-
-        pand        mm1, mm7              ;
-        mm1 select vals > thresh from source
-        pandn       mm7, mm3              ;
-        mm7 select vals < thresh from blurred result
-        paddusw     mm1, mm7              ;
-        combination
-
-        packuswb    mm1, mm0              ;
-        pack to bytes
-        mov         DWORD PTR [edi+edx-4],  eax   ;
-        store previous four bytes
-        movd        eax,    mm1
-
-        add         edx, 4
-        cmp         edx, cols
-        jl          acrossnextcol;
-
-        mov         DWORD PTR [edi+edx-4],  eax
-        pop         eax
-
-        // done with this rwo
-        add         esi, eax               ;
-        next line
-        mov         eax, dst_pixels_per_line ;
-        destination pitch?
-        add         edi, eax               ;
-        next destination
-        mov         eax, src_pixels_per_line ;
-        destination pitch?
-
-        dec         ecx                   ;
-        decrement count
-        jnz         nextrow               ;
-        next row
-        pop         ebx
-
-    }
-}
-
-
-
-void vp8_post_proc_down_and_across_xmm
-(
-    unsigned char *src_ptr,
-    unsigned char *dst_ptr,
-    int src_pixels_per_line,
-    int dst_pixels_per_line,
-    int rows,
-    int cols,
-    int flimit
-)
-{
-#ifdef RELOCATEABLE
-    R4D2
-#endif
-
-    __asm
-    {
-        movd        xmm2,       flimit
-        punpcklwd   xmm2,       xmm2
-        punpckldq   xmm2,       xmm2
-        punpcklqdq  xmm2,       xmm2
-
-        mov         esi,        src_ptr
-        mov         edi,        dst_ptr
-
-        mov         ecx,        DWORD PTR rows
-        mov         eax,        src_pixels_per_line ;
-        destination pitch?
-        pxor        xmm0,       xmm0              ;
-        mm0 = 00000000
-
-        nextrow:
-
-        xor         edx,        edx       ;
-
-        clear out edx for use as loop counter
-        nextcol:
-        movq        xmm3,       QWORD PTR [esi]         ;
-
-        mm4 = r0 p0..p7
-        punpcklbw   xmm3,       xmm0                    ;
-        mm3 = p0..p3
-        movdqa      xmm1,       xmm3                    ;
-        mm1 = p0..p3
-        psllw       xmm3,       2                       ;
-
-        movq        xmm5,       QWORD PTR [esi + eax]   ;
-        mm4 = r1 p0..p7
-        punpcklbw   xmm5,       xmm0                    ;
-        mm5 = r1 p0..p3
-        paddusw     xmm3,       xmm5                    ;
-        mm3 += mm6
-
-        ;
-        thresholding
-        movdqa      xmm7,       xmm1                    ;
-        mm7 = r0 p0..p3
-        psubusw     xmm7,       xmm5                    ;
-        mm7 = r0 p0..p3 - r1 p0..p3
-        psubusw     xmm5,       xmm1                    ;
-        mm5 = r1 p0..p3 - r0 p0..p3
-        paddusw     xmm7,       xmm5                    ;
-        mm7 = abs(r0 p0..p3 - r1 p0..p3)
-        pcmpgtw     xmm7,       xmm2
-
-        movq        xmm5,       QWORD PTR [esi + 2*eax] ;
-        mm4 = r2 p0..p7
-        punpcklbw   xmm5,       xmm0                    ;
-        mm5 = r2 p0..p3
-        paddusw     xmm3,       xmm5                    ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1                    ;
-        mm6 = r0 p0..p3
-        psubusw     xmm6,       xmm5                    ;
-        mm6 = r0 p0..p3 - r2 p0..p3
-        psubusw     xmm5,       xmm1                    ;
-        mm5 = r2 p0..p3 - r2 p0..p3
-        paddusw     xmm6,       xmm5                    ;
-        mm6 = abs(r0 p0..p3 - r2 p0..p3)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6                    ;
-        accumulate thresholds
-
-
-        neg         eax
-        movq        xmm5,       QWORD PTR [esi+2*eax]   ;
-        mm4 = r-2 p0..p7
-        punpcklbw   xmm5,       xmm0                    ;
-        mm5 = r-2 p0..p3
-        paddusw     xmm3,       xmm5                    ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1                    ;
-        mm6 = r0 p0..p3
-        psubusw     xmm6,       xmm5                    ;
-        mm6 = p0..p3 - r-2 p0..p3
-        psubusw     xmm5,       xmm1                    ;
-        mm5 = r-2 p0..p3 - p0..p3
-        paddusw     xmm6,       xmm5                    ;
-        mm6 = abs(r0 p0..p3 - r-2 p0..p3)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6                    ;
-        accumulate thresholds
-
-        movq        xmm4,       QWORD PTR [esi+eax]     ;
-        mm4 = r-1 p0..p7
-        punpcklbw   xmm4,       xmm0                    ;
-        mm4 = r-1 p0..p3
-        paddusw     xmm3,       xmm4                    ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1                    ;
-        mm6 = r0 p0..p3
-        psubusw     xmm6,       xmm4                    ;
-        mm6 = p0..p3 - r-2 p0..p3
-        psubusw     xmm4,       xmm1                    ;
-        mm5 = r-1 p0..p3 - p0..p3
-        paddusw     xmm6,       xmm4                    ;
-        mm6 = abs(r0 p0..p3 - r-1 p0..p3)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6                    ;
-        accumulate thresholds
-
-
-        paddusw     xmm3,       rd42                    ;
-        mm3 += round value
-        psraw       xmm3,       3                       ;
-        mm3 /= 8
-
-        pand        xmm1,       xmm7                    ;
-        mm1 select vals > thresh from source
-        pandn       xmm7,       xmm3                    ;
-        mm7 select vals < thresh from blurred result
-        paddusw     xmm1,       xmm7                    ;
-        combination
-
-        packuswb    xmm1,       xmm0                    ;
-        pack to bytes
-        movq        QWORD PTR [edi], xmm1             ;
-
-        neg         eax                   ;
-        pitch is positive
-        add         esi,        8
-        add         edi,        8
-
-        add         edx,        8
-        cmp         edx,        cols
-
-        jl          nextcol
-
-        // done with the all cols, start the across filtering in place
-        sub         esi,        edx
-        sub         edi,        edx
-
-        xor         edx,        edx
-        movq        mm0,        QWORD PTR [edi-8];
-
-        acrossnextcol:
-        movq        xmm7,       QWORD PTR [edi +edx -2]
-        movd        xmm4,       DWORD PTR [edi +edx +6]
-
-        pslldq      xmm4,       8
-        por         xmm4,       xmm7
-
-        movdqa      xmm3,       xmm4
-        psrldq      xmm3,       2
-        punpcklbw   xmm3,       xmm0              ;
-        mm3 = p0..p3
-        movdqa      xmm1,       xmm3              ;
-        mm1 = p0..p3
-        psllw       xmm3,       2
-
-
-        movdqa      xmm5,       xmm4
-        psrldq      xmm5,       3
-        punpcklbw   xmm5,       xmm0              ;
-        mm5 = p1..p4
-        paddusw     xmm3,       xmm5              ;
-        mm3 += mm6
-
-        ;
-        thresholding
-        movdqa      xmm7,       xmm1              ;
-        mm7 = p0..p3
-        psubusw     xmm7,       xmm5              ;
-        mm7 = p0..p3 - p1..p4
-        psubusw     xmm5,       xmm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     xmm7,       xmm5              ;
-        mm7 = abs(p0..p3 - p1..p4)
-        pcmpgtw     xmm7,       xmm2
-
-        movdqa      xmm5,       xmm4
-        psrldq      xmm5,       4
-        punpcklbw   xmm5,       xmm0              ;
-        mm5 = p2..p5
-        paddusw     xmm3,       xmm5              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1              ;
-        mm6 = p0..p3
-        psubusw     xmm6,       xmm5              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     xmm5,       xmm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     xmm6,       xmm5              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6              ;
-        accumulate thresholds
-
-
-        movdqa      xmm5,       xmm4              ;
-        mm5 = p-2..p5
-        punpcklbw   xmm5,       xmm0              ;
-        mm5 = p-2..p1
-        paddusw     xmm3,       xmm5              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1              ;
-        mm6 = p0..p3
-        psubusw     xmm6,       xmm5              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     xmm5,       xmm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     xmm6,       xmm5              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6              ;
-        accumulate thresholds
-
-        psrldq      xmm4,       1                   ;
-        mm4 = p-1..p5
-        punpcklbw   xmm4,       xmm0              ;
-        mm4 = p-1..p2
-        paddusw     xmm3,       xmm4              ;
-        mm3 += mm5
-
-        ;
-        thresholding
-        movdqa      xmm6,       xmm1              ;
-        mm6 = p0..p3
-        psubusw     xmm6,       xmm4              ;
-        mm6 = p0..p3 - p1..p4
-        psubusw     xmm4,       xmm1              ;
-        mm5 = p1..p4 - p0..p3
-        paddusw     xmm6,       xmm4              ;
-        mm6 = abs(p0..p3 - p1..p4)
-        pcmpgtw     xmm6,       xmm2
-        por         xmm7,       xmm6              ;
-        accumulate thresholds
-
-        paddusw     xmm3,       rd42              ;
-        mm3 += round value
-        psraw       xmm3,       3                 ;
-        mm3 /= 8
-
-        pand        xmm1,       xmm7              ;
-        mm1 select vals > thresh from source
-        pandn       xmm7,       xmm3              ;
-        mm7 select vals < thresh from blurred result
-        paddusw     xmm1,       xmm7              ;
-        combination
-
-        packuswb    xmm1,       xmm0              ;
-        pack to bytes
-        movq        QWORD PTR [edi+edx-8],  mm0   ;
-        store previous four bytes
-        movdq2q     mm0,        xmm1
-
-        add         edx,        8
-        cmp         edx,        cols
-        jl          acrossnextcol;
-
-        // last 8 pixels
-        movq        QWORD PTR [edi+edx-8],  mm0
-
-        // done with this rwo
-        add         esi, eax               ;
-        next line
-        mov         eax, dst_pixels_per_line ;
-        destination pitch?
-        add         edi, eax               ;
-        next destination
-        mov         eax, src_pixels_per_line ;
-        destination pitch?
-
-        dec         ecx                   ;
-        decrement count
-        jnz         nextrow               ;
-        next row
-    }
-}
-
-
-void vp8_mbpost_proc_down_mmx(unsigned char *dst, int pitch, int rows, int cols, int flimit)
-{
-    int c, i;
-    __declspec(align(16))
-    int flimit2[2];
-    __declspec(align(16))
-    unsigned char d[16][8];
-
-    flimit = vp8_q2mbl(flimit);
-
-    for (i = 0; i < 2; i++)
-        flimit2[i] = flimit;
-
-    rows += 8;
-
-    for (c = 0; c < cols; c += 4)
-    {
-        unsigned char *s = &dst[c];
-
-        __asm
-        {
-            mov         esi,        s           ;
-            pxor        mm0,        mm0     ;
-
-            mov         eax,        pitch       ;
-            neg         eax                                     // eax = -pitch
-
-            lea         esi,        [esi + eax*8];              // edi = s[-pitch*8]
-            neg         eax
-
-
-            pxor        mm5,        mm5
-            pxor        mm6,        mm6     ;
-
-            pxor        mm7,        mm7     ;
-            mov         edi,        esi
-
-            mov         ecx,        15          ;
-
-            loop_initvar:
-            movd        mm1,        DWORD PTR [edi];
-            punpcklbw   mm1,        mm0     ;
-
-            paddw       mm5,        mm1     ;
-            pmullw      mm1,        mm1     ;
-
-            movq        mm2,        mm1     ;
-            punpcklwd   mm1,        mm0     ;
-
-            punpckhwd   mm2,        mm0     ;
-            paddd       mm6,        mm1     ;
-
-            paddd       mm7,        mm2     ;
-            lea         edi,        [edi+eax]   ;
-
-            dec         ecx
-            jne         loop_initvar
-            //save the var and sum
-            xor         edx,        edx
-            loop_row:
-            movd        mm1,        DWORD PTR [esi]     // [s-pitch*8]
-            movd        mm2,        DWORD PTR [edi]     // [s+pitch*7]
-
-            punpcklbw   mm1,        mm0
-            punpcklbw   mm2,        mm0
-
-            paddw       mm5,        mm2
-            psubw       mm5,        mm1
-
-            pmullw      mm2,        mm2
-            movq        mm4,        mm2
-
-            punpcklwd   mm2,        mm0
-            punpckhwd   mm4,        mm0
-
-            paddd       mm6,        mm2
-            paddd       mm7,        mm4
-
-            pmullw      mm1,        mm1
-            movq        mm2,        mm1
-
-            punpcklwd   mm1,        mm0
-            psubd       mm6,        mm1
-
-            punpckhwd   mm2,        mm0
-            psubd       mm7,        mm2
-
-
-            movq        mm3,        mm6
-            pslld       mm3,        4
-
-            psubd       mm3,        mm6
-            movq        mm1,        mm5
-
-            movq        mm4,        mm5
-            pmullw      mm1,        mm1
-
-            pmulhw      mm4,        mm4
-            movq        mm2,        mm1
-
-            punpcklwd   mm1,        mm4
-            punpckhwd   mm2,        mm4
-
-            movq        mm4,        mm7
-            pslld       mm4,        4
-
-            psubd       mm4,        mm7
-
-            psubd       mm3,        mm1
-            psubd       mm4,        mm2
-
-            psubd       mm3,        flimit2
-            psubd       mm4,        flimit2
-
-            psrad       mm3,        31
-            psrad       mm4,        31
-
-            packssdw    mm3,        mm4
-            packsswb    mm3,        mm0
-
-            movd        mm1,        DWORD PTR [esi+eax*8]
-
-            movq        mm2,        mm1
-            punpcklbw   mm1,        mm0
-
-            paddw       mm1,        mm5
-            mov         ecx,        edx
-
-            and         ecx,        127
-            movq        mm4,        vp8_rv[ecx*2]
-
-            paddw       mm1,        mm4
-            //paddw     xmm1,       eight8s
-            psraw       mm1,        4
-
-            packuswb    mm1,        mm0
-            pand        mm1,        mm3
-
-            pandn       mm3,        mm2
-            por         mm1,        mm3
-
-            and         ecx,        15
-            movd        DWORD PTR  d[ecx*4], mm1
-
-            mov         ecx,        edx
-            sub         ecx,        8
-
-            and         ecx,        15
-            movd        mm1,        DWORD PTR d[ecx*4]
-
-            movd        [esi],      mm1
-            lea         esi,        [esi+eax]
-
-            lea         edi,        [edi+eax]
-            add         edx,        1
-
-            cmp         edx,        rows
-            jl          loop_row
-
-        }
-
-    }
-}
-
-void vp8_mbpost_proc_down_xmm(unsigned char *dst, int pitch, int rows, int cols, int flimit)
-{
-    int c, i;
-    __declspec(align(16))
-    int flimit4[4];
-    __declspec(align(16))
-    unsigned char d[16][8];
-
-    flimit = vp8_q2mbl(flimit);
-
-    for (i = 0; i < 4; i++)
-        flimit4[i] = flimit;
-
-    rows += 8;
-
-    for (c = 0; c < cols; c += 8)
-    {
-        unsigned char *s = &dst[c];
-
-        __asm
-        {
-            mov         esi,        s           ;
-            pxor        xmm0,       xmm0        ;
-
-            mov         eax,        pitch       ;
-            neg         eax                                     // eax = -pitch
-
-            lea         esi,        [esi + eax*8];              // edi = s[-pitch*8]
-            neg         eax
-
-
-            pxor        xmm5,       xmm5
-            pxor        xmm6,       xmm6        ;
-
-            pxor        xmm7,       xmm7        ;
-            mov         edi,        esi
-
-            mov         ecx,        15          ;
-
-            loop_initvar:
-            movq        xmm1,       QWORD PTR [edi];
-            punpcklbw   xmm1,       xmm0        ;
-
-            paddw       xmm5,       xmm1        ;
-            pmullw      xmm1,       xmm1        ;
-
-            movdqa      xmm2,       xmm1        ;
-            punpcklwd   xmm1,       xmm0        ;
-
-            punpckhwd   xmm2,       xmm0        ;
-            paddd       xmm6,       xmm1        ;
-
-            paddd       xmm7,       xmm2        ;
-            lea         edi,        [edi+eax]   ;
-
-            dec         ecx
-            jne         loop_initvar
-            //save the var and sum
-            xor         edx,        edx
-            loop_row:
-            movq        xmm1,       QWORD PTR [esi]     // [s-pitch*8]
-            movq        xmm2,       QWORD PTR [edi]     // [s+pitch*7]
-
-            punpcklbw   xmm1,       xmm0
-            punpcklbw   xmm2,       xmm0
-
-            paddw       xmm5,       xmm2
-            psubw       xmm5,       xmm1
-
-            pmullw      xmm2,       xmm2
-            movdqa      xmm4,       xmm2
-
-            punpcklwd   xmm2,       xmm0
-            punpckhwd   xmm4,       xmm0
-
-            paddd       xmm6,       xmm2
-            paddd       xmm7,       xmm4
-
-            pmullw      xmm1,       xmm1
-            movdqa      xmm2,       xmm1
-
-            punpcklwd   xmm1,       xmm0
-            psubd       xmm6,       xmm1
-
-            punpckhwd   xmm2,       xmm0
-            psubd       xmm7,       xmm2
-
-
-            movdqa      xmm3,       xmm6
-            pslld       xmm3,       4
-
-            psubd       xmm3,       xmm6
-            movdqa      xmm1,       xmm5
-
-            movdqa      xmm4,       xmm5
-            pmullw      xmm1,       xmm1
-
-            pmulhw      xmm4,       xmm4
-            movdqa      xmm2,       xmm1
-
-            punpcklwd   xmm1,       xmm4
-            punpckhwd   xmm2,       xmm4
-
-            movdqa      xmm4,       xmm7
-            pslld       xmm4,       4
-
-            psubd       xmm4,       xmm7
-
-            psubd       xmm3,       xmm1
-            psubd       xmm4,       xmm2
-
-            psubd       xmm3,       flimit4
-            psubd       xmm4,       flimit4
-
-            psrad       xmm3,       31
-            psrad       xmm4,       31
-
-            packssdw    xmm3,       xmm4
-            packsswb    xmm3,       xmm0
-
-            movq        xmm1,       QWORD PTR [esi+eax*8]
-
-            movq        xmm2,       xmm1
-            punpcklbw   xmm1,       xmm0
-
-            paddw       xmm1,       xmm5
-            mov         ecx,        edx
-
-            and         ecx,        127
-            movdqu      xmm4,       vp8_rv[ecx*2]
-
-            paddw       xmm1,       xmm4
-            //paddw     xmm1,       eight8s
-            psraw       xmm1,       4
-
-            packuswb    xmm1,       xmm0
-            pand        xmm1,       xmm3
-
-            pandn       xmm3,       xmm2
-            por         xmm1,       xmm3
-
-            and         ecx,        15
-            movq        QWORD PTR  d[ecx*8], xmm1
-
-            mov         ecx,        edx
-            sub         ecx,        8
-
-            and         ecx,        15
-            movq        mm0,        d[ecx*8]
-
-            movq        [esi],      mm0
-            lea         esi,        [esi+eax]
-
-            lea         edi,        [edi+eax]
-            add         edx,        1
-
-            cmp         edx,        rows
-            jl          loop_row
-
-        }
-
-    }
-}
-#if 0
-/****************************************************************************
- *
- *  ROUTINE       : plane_add_noise_wmt
- *
- *  INPUTS        : unsigned char *Start    starting address of buffer to add gaussian
- *                                  noise to
- *                  unsigned int Width    width of plane
- *                  unsigned int Height   height of plane
- *                  int  Pitch    distance between subsequent lines of frame
- *                  int  q        quantizer used to determine amount of noise
- *                                  to add
- *
- *  OUTPUTS       : None.
- *
- *  RETURNS       : void.
- *
- *  FUNCTION      : adds gaussian noise to a plane of pixels
- *
- *  SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_plane_add_noise_wmt(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a)
-{
-    unsigned int i;
-
-    __declspec(align(16)) unsigned char blackclamp[16];
-    __declspec(align(16)) unsigned char whiteclamp[16];
-    __declspec(align(16)) unsigned char bothclamp[16];
-    char char_dist[300];
-    char Rand[2048];
-    double sigma;
-//    return;
-    __asm emms
-    sigma = a + .5 + .6 * (63 - q) / 63.0;
-
-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
-    {
-        double i;
-        int next, j;
-
-        next = 0;
-
-        for (i = -32; i < 32; i++)
-        {
-            double g = 256 * vp8_gaussian(sigma, 0, 1.0 * i);
-            int a = (int)(g + .5);
-
-            if (a)
-            {
-                for (j = 0; j < a; j++)
-                {
-                    char_dist[next+j] = (char) i;
-                }
-
-                next = next + j;
-            }
-
-        }
-
-        for (next = next; next < 256; next++)
-            char_dist[next] = 0;
-
-    }
-
-    for (i = 0; i < 2048; i++)
-    {
-        Rand[i] = char_dist[rand() & 0xff];
-    }
-
-    for (i = 0; i < 16; i++)
-    {
-        blackclamp[i] = -char_dist[0];
-        whiteclamp[i] = -char_dist[0];
-        bothclamp[i] = -2 * char_dist[0];
-    }
-
-    for (i = 0; i < Height; i++)
-    {
-        unsigned char *Pos = Start + i * Pitch;
-        char  *Ref = Rand + (rand() & 0xff);
-
-        __asm
-        {
-            mov ecx, [Width]
-            mov esi, Pos
-            mov edi, Ref
-            xor         eax, eax
-
-            nextset:
-            movdqu      xmm1, [esi+eax]        // get the source
-
-            psubusb     xmm1, blackclamp       // clamp both sides so we don't outrange adding noise
-            paddusb     xmm1, bothclamp
-            psubusb     xmm1, whiteclamp
-
-            movdqu      xmm2, [edi+eax]        // get the noise for this line
-            paddb       xmm1, xmm2             // add it in
-            movdqu      [esi+eax], xmm1        // store the result
-
-            add         eax, 16                // move to the next line
-
-            cmp         eax, ecx
-            jl          nextset
-
-
-        }
-
-    }
-}
-#endif
-__declspec(align(16))
-static const int four8s[4] = { 8, 8, 8, 8};
-void vp8_mbpost_proc_across_ip_xmm(unsigned char *src, int pitch, int rows, int cols, int flimit)
-{
-    int r, i;
-    __declspec(align(16))
-    int flimit4[4];
-    unsigned char *s = src;
-    int sumsq;
-    int sum;
-
-
-    flimit = vp8_q2mbl(flimit);
-    flimit4[0] =
-        flimit4[1] =
-            flimit4[2] =
-                flimit4[3] = flimit;
-
-    for (r = 0; r < rows; r++)
-    {
-
-
-        sumsq = 0;
-        sum = 0;
-
-        for (i = -8; i <= 6; i++)
-        {
-            sumsq += s[i] * s[i];
-            sum   += s[i];
-        }
-
-        __asm
-        {
-            mov         eax,    sumsq
-            movd        xmm7,   eax
-
-            mov         eax,    sum
-            movd        xmm6,   eax
-
-            mov         esi,    s
-            xor         ecx,    ecx
-
-            mov         edx,    cols
-            add         edx,    8
-            pxor        mm0,    mm0
-            pxor        mm1,    mm1
-
-            pxor        xmm0,   xmm0
-            nextcol4:
-
-            movd        xmm1,   DWORD PTR [esi+ecx-8]   // -8 -7 -6 -5
-            movd        xmm2,   DWORD PTR [esi+ecx+7]   // +7 +8 +9 +10
-
-            punpcklbw   xmm1,   xmm0                    // expanding
-            punpcklbw   xmm2,   xmm0                    // expanding
-
-            punpcklwd   xmm1,   xmm0                    // expanding to dwords
-            punpcklwd   xmm2,   xmm0                    // expanding to dwords
-
-            psubd       xmm2,   xmm1                    // 7--8   8--7   9--6 10--5
-            paddd       xmm1,   xmm1                    // -8*2   -7*2   -6*2 -5*2
-
-            paddd       xmm1,   xmm2                    // 7+-8   8+-7   9+-6 10+-5
-            pmaddwd     xmm1,   xmm2                    // squared of 7+-8   8+-7   9+-6 10+-5
-
-            paddd       xmm6,   xmm2
-            paddd       xmm7,   xmm1
-
-            pshufd      xmm6,   xmm6,   0               // duplicate the last ones
-            pshufd      xmm7,   xmm7,   0               // duplicate the last ones
-
-            psrldq      xmm1,       4                   // 8--7   9--6 10--5  0000
-            psrldq      xmm2,       4                   // 8--7   9--6 10--5  0000
-
-            pshufd      xmm3,   xmm1,   3               // 0000  8--7   8--7   8--7 squared
-            pshufd      xmm4,   xmm2,   3               // 0000  8--7   8--7   8--7 squared
-
-            paddd       xmm6,   xmm4
-            paddd       xmm7,   xmm3
-
-            pshufd      xmm3,   xmm1,   01011111b       // 0000  0000   9--6   9--6 squared
-            pshufd      xmm4,   xmm2,   01011111b       // 0000  0000   9--6   9--6 squared
-
-            paddd       xmm7,   xmm3
-            paddd       xmm6,   xmm4
-
-            pshufd      xmm3,   xmm1,   10111111b       // 0000  0000   8--7   8--7 squared
-            pshufd      xmm4,   xmm2,   10111111b       // 0000  0000   8--7   8--7 squared
-
-            paddd       xmm7,   xmm3
-            paddd       xmm6,   xmm4
-
-            movdqa      xmm3,   xmm6
-            pmaddwd     xmm3,   xmm3
-
-            movdqa      xmm5,   xmm7
-            pslld       xmm5,   4
-
-            psubd       xmm5,   xmm7
-            psubd       xmm5,   xmm3
-
-            psubd       xmm5,   flimit4
-            psrad       xmm5,   31
-
-            packssdw    xmm5,   xmm0
-            packsswb    xmm5,   xmm0
-
-            movd        xmm1,   DWORD PTR [esi+ecx]
-            movq        xmm2,   xmm1
-
-            punpcklbw   xmm1,   xmm0
-            punpcklwd   xmm1,   xmm0
-
-            paddd       xmm1,   xmm6
-            paddd       xmm1,   four8s
-
-            psrad       xmm1,   4
-            packssdw    xmm1,   xmm0
-
-            packuswb    xmm1,   xmm0
-            pand        xmm1,   xmm5
-
-            pandn       xmm5,   xmm2
-            por         xmm5,   xmm1
-
-            movd        [esi+ecx-8],  mm0
-            movq        mm0,    mm1
-
-            movdq2q     mm1,    xmm5
-            psrldq      xmm7,   12
-
-            psrldq      xmm6,   12
-            add         ecx,    4
-
-            cmp         ecx,    edx
-            jl          nextcol4
-
-        }
-        s += pitch;
-    }
-}
-
-#if 0
-
-/****************************************************************************
- *
- *  ROUTINE       : plane_add_noise_mmx
- *
- *  INPUTS        : unsigned char *Start    starting address of buffer to add gaussian
- *                                  noise to
- *                  unsigned int Width    width of plane
- *                  unsigned int Height   height of plane
- *                  int  Pitch    distance between subsequent lines of frame
- *                  int  q        quantizer used to determine amount of noise
- *                                  to add
- *
- *  OUTPUTS       : None.
- *
- *  RETURNS       : void.
- *
- *  FUNCTION      : adds gaussian noise to a plane of pixels
- *
- *  SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_plane_add_noise_mmx(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a)
-{
-    unsigned int i;
-    int Pitch4 = Pitch * 4;
-    const int noise_amount = 2;
-    const int noise_adder = 2 * noise_amount + 1;
-
-    __declspec(align(16)) unsigned char blackclamp[16];
-    __declspec(align(16)) unsigned char whiteclamp[16];
-    __declspec(align(16)) unsigned char bothclamp[16];
-
-    char char_dist[300];
-    char Rand[2048];
-
-    double sigma;
-    __asm emms
-    sigma = a + .5 + .6 * (63 - q) / 63.0;
-
-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
-    {
-        double i, sum = 0;
-        int next, j;
-
-        next = 0;
-
-        for (i = -32; i < 32; i++)
-        {
-            int a = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
-
-            if (a)
-            {
-                for (j = 0; j < a; j++)
-                {
-                    char_dist[next+j] = (char) i;
-                }
-
-                next = next + j;
-            }
-
-        }
-
-        for (next = next; next < 256; next++)
-            char_dist[next] = 0;
-
-    }
-
-    for (i = 0; i < 2048; i++)
-    {
-        Rand[i] = char_dist[rand() & 0xff];
-    }
-
-    for (i = 0; i < 16; i++)
-    {
-        blackclamp[i] = -char_dist[0];
-        whiteclamp[i] = -char_dist[0];
-        bothclamp[i] = -2 * char_dist[0];
-    }
-
-    for (i = 0; i < Height; i++)
-    {
-        unsigned char *Pos = Start + i * Pitch;
-        char  *Ref = Rand + (rand() & 0xff);
-
-        __asm
-        {
-            mov ecx, [Width]
-            mov esi, Pos
-            mov edi, Ref
-            xor         eax, eax
-
-            nextset:
-            movq        mm1, [esi+eax]        // get the source
-
-            psubusb     mm1, blackclamp       // clamp both sides so we don't outrange adding noise
-            paddusb     mm1, bothclamp
-            psubusb     mm1, whiteclamp
-
-            movq        mm2, [edi+eax]        // get the noise for this line
-            paddb       mm1, mm2             // add it in
-            movq        [esi+eax], mm1        // store the result
-
-            add         eax, 8                // move to the next line
-
-            cmp         eax, ecx
-            jl          nextset
-
-
-        }
-
-    }
-}
-#else
-extern char an[8][64][3072];
-extern int cd[8][64];
-
-void vp8_plane_add_noise_mmx(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a)
-{
-    unsigned int i;
-    __declspec(align(16)) unsigned char blackclamp[16];
-    __declspec(align(16)) unsigned char whiteclamp[16];
-    __declspec(align(16)) unsigned char bothclamp[16];
-
-
-    __asm emms
-
-    for (i = 0; i < 16; i++)
-    {
-        blackclamp[i] = -cd[a][q];
-        whiteclamp[i] = -cd[a][q];
-        bothclamp[i] = -2 * cd[a][q];
-    }
-
-    for (i = 0; i < Height; i++)
-    {
-        unsigned char *Pos = Start + i * Pitch;
-        char  *Ref = an[a][q] + (rand() & 0xff);
-
-        __asm
-        {
-            mov ecx, [Width]
-            mov esi, Pos
-            mov edi, Ref
-            xor         eax, eax
-
-            nextset:
-            movq        mm1, [esi+eax]        // get the source
-
-            psubusb     mm1, blackclamp       // clamp both sides so we don't outrange adding noise
-            paddusb     mm1, bothclamp
-            psubusb     mm1, whiteclamp
-
-            movq        mm2, [edi+eax]        // get the noise for this line
-            paddb       mm1, mm2             // add it in
-            movq        [esi+eax], mm1        // store the result
-
-            add         eax, 8                // move to the next line
-
-            cmp         eax, ecx
-            jl          nextset
-        }
-    }
-}
-
-
-void vp8_plane_add_noise_wmt(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a)
-{
-    unsigned int i;
-
-    __declspec(align(16)) unsigned char blackclamp[16];
-    __declspec(align(16)) unsigned char whiteclamp[16];
-    __declspec(align(16)) unsigned char bothclamp[16];
-
-    __asm emms
-
-    for (i = 0; i < 16; i++)
-    {
-        blackclamp[i] = -cd[a][q];
-        whiteclamp[i] = -cd[a][q];
-        bothclamp[i] = -2 * cd[a][q];
-    }
-
-    for (i = 0; i < Height; i++)
-    {
-        unsigned char *Pos = Start + i * Pitch;
-        char *Ref = an[a][q] + (rand() & 0xff);
-
-        __asm
-        {
-            mov ecx,    [Width]
-            mov esi,    Pos
-            mov edi,    Ref
-            xor         eax, eax
-
-            nextset:
-            movdqu      xmm1, [esi+eax]        // get the source
-
-            psubusb     xmm1, blackclamp       // clamp both sides so we don't outrange adding noise
-            paddusb     xmm1, bothclamp
-            psubusb     xmm1, whiteclamp
-
-            movdqu      xmm2, [edi+eax]        // get the noise for this line
-            paddb       xmm1, xmm2             // add it in
-            movdqu      [esi+eax], xmm1        // store the result
-
-            add         eax, 16                // move to the next line
-
-            cmp         eax, ecx
-            jl          nextset
-        }
-    }
-}
-
-#endif
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
index 4c88db474b75a9843f681e09dfcdf5b5af04a48a..3a48068622af43ea99c6f5fd0f368d4462c4a9ba 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -12,17 +12,17 @@
 #include "vp8/common/idct.h"
 #include "vp8/decoder/dequantize.h"
 
-void idct_dequant_dc_0_2x_sse2
+void vp8_idct_dequant_dc_0_2x_sse2
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int dst_stride, short *dc);
-void idct_dequant_dc_full_2x_sse2
+void vp8_idct_dequant_dc_full_2x_sse2
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int dst_stride, short *dc);
 
-void idct_dequant_0_2x_sse2
+void vp8_idct_dequant_0_2x_sse2
             (short *q, short *dq ,unsigned char *pre,
              unsigned char *dst, int dst_stride, int blk_stride);
-void idct_dequant_full_2x_sse2
+void vp8_idct_dequant_full_2x_sse2
             (short *q, short *dq ,unsigned char *pre,
              unsigned char *dst, int dst_stride, int blk_stride);
 
@@ -35,14 +35,14 @@ void vp8_dequant_dc_idct_add_y_block_sse2
     for (i = 0; i < 4; i++)
     {
         if (((short *)(eobs))[0] & 0xfefe)
-            idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+            vp8_idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
         else
-            idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+            vp8_idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
 
         if (((short *)(eobs))[1] & 0xfefe)
-            idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+            vp8_idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
         else
-            idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+            vp8_idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
 
         q    += 64;
         dc   += 4;
@@ -61,14 +61,14 @@ void vp8_dequant_idct_add_y_block_sse2
     for (i = 0; i < 4; i++)
     {
         if (((short *)(eobs))[0] & 0xfefe)
-            idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+            vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
         else
-            idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+            vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
 
         if (((short *)(eobs))[1] & 0xfefe)
-            idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+            vp8_idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
         else
-            idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+            vp8_idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
 
         q    += 64;
         pre  += 64;
@@ -82,33 +82,33 @@ void vp8_dequant_idct_add_uv_block_sse2
              unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
 {
     if (((short *)(eobs))[0] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
 
     q    += 32;
     pre  += 32;
     dstu += stride*4;
 
     if (((short *)(eobs))[1] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
 
     q    += 32;
     pre  += 32;
 
     if (((short *)(eobs))[2] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
 
     q    += 32;
     pre  += 32;
     dstv += stride*4;
 
     if (((short *)(eobs))[3] & 0xfefe)
-        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
     else
-        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+        vp8_idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
 }
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 64d1c93042c18734afcf4df04ad28868925bc433..e65a7f9d4dda64184400004940d8b2a1fb5a6f96 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -22,7 +22,8 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx_mem/vpx_mem.h"
 #include "bitstream.h"
-#include "vp8/common/defaultcoefcounts.h"
+
+#include "defaultcoefcounts.h"
 #if CONFIG_SEGMENTATION
 static int segment_cost = 0;
 #endif
@@ -1295,7 +1296,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
             if (cpi->common.frame_type == KEY_FRAME)
             {
                 /* Reset to default probabilities at key frames */
-                sum_probs_over_prev_coef_context(vp8_default_coef_counts[i][j],
+                sum_probs_over_prev_coef_context(default_coef_counts[i][j],
                                                  prev_coef_count_sum);
             }
             else
diff --git a/vp8/common/defaultcoefcounts.c b/vp8/encoder/defaultcoefcounts.h
similarity index 98%
rename from vp8/common/defaultcoefcounts.c
rename to vp8/encoder/defaultcoefcounts.h
index 34d1fb1d510b16dd8c558f5e19cf3d90343f153d..3b54c823cb29970765e1168c832fa2a0485c048f 100644
--- a/vp8/common/defaultcoefcounts.c
+++ b/vp8/encoder/defaultcoefcounts.h
@@ -8,14 +8,12 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "defaultcoefcounts.h"
-
 /* Generated file, included by entropy.c */
 
-const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
-                                          [COEF_BANDS]
-                                          [PREV_COEF_CONTEXTS]
-                                          [MAX_ENTROPY_TOKENS] =
+static const unsigned int default_coef_counts[BLOCK_TYPES]
+                                             [COEF_BANDS]
+                                             [PREV_COEF_CONTEXTS]
+                                             [MAX_ENTROPY_TOKENS] =
 {
 
     {
@@ -401,4 +399,4 @@ const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
       }
     }
   };
-#endif
\ No newline at end of file
+#endif
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 1a883b8ab2eaf8e2cf90da7556234427921bd134..b6e5a41c2b1cb3b3ed9f275b88a7ffd739167aa6 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -19,8 +19,6 @@ VP8_COMMON_SRCS-yes += common/asm_com_offsets.c
 VP8_COMMON_SRCS-yes += common/blockd.c
 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
 VP8_COMMON_SRCS-yes += common/debugmodes.c
-VP8_COMMON_SRCS-yes += common/defaultcoefcounts.h
-VP8_COMMON_SRCS-yes += common/defaultcoefcounts.c
 VP8_COMMON_SRCS-yes += common/entropy.c
 VP8_COMMON_SRCS-yes += common/entropymode.c
 VP8_COMMON_SRCS-yes += common/entropymv.c
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index d46d99df6626188f2ff2956d52a8f48232db6915..b71a54aea938d906839e1aef1b99aa50c6366a07 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -34,6 +34,7 @@ VP8_CX_SRCS-yes += vp8_cx_iface.c
 #INCLUDES += encoder
 
 VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c
+VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h
 VP8_CX_SRCS-yes += encoder/bitstream.c
 VP8_CX_SRCS-yes += encoder/boolhuff.c
 VP8_CX_SRCS-yes += encoder/dct.c