diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
index 75d52dbeb9c038fb1603b19d643f8de587827296..c5ec824b3409bbe5ccbc0400f78c5db7b39c9b36 100644
--- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm
+++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
@@ -18,15 +18,23 @@
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
 
-;void vp8_intra4x4_predict_armv6(unsigned char *src, int src_stride, int b_mode,
-;                                unsigned char *dst, int dst_stride)
-
+;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
+;                                B_PREDICTION_MODE left_stride, int b_mode,
+;                                unsigned char *dst, int dst_stride,
+;                                unsigned char top_left)
+
+; r0: *Above
+; r1: *yleft
+; r2: left_stride
+; r3: b_mode
+; sp + #40: dst
+; sp + #44: dst_stride
+; sp + #48: top_left
 |vp8_intra4x4_predict_armv6| PROC
     push        {r4-r12, lr}
 
-
-    cmp         r2, #10
-    addlt       pc, pc, r2, lsl #2       ; position independent switch
+    cmp         r3, #10
+    addlt       pc, pc, r3, lsl #2       ; position independent switch
     pop         {r4-r12, pc}             ; default
     b           b_dc_pred
     b           b_tm_pred
@@ -41,13 +49,13 @@
 
 b_dc_pred
     ; load values
-    ldr         r8, [r0, -r1]            ; Above
-    ldrb        r4, [r0, #-1]!           ; Left[0]
+    ldr         r8, [r0]                 ; Above
+    ldrb        r4, [r1], r2             ; Left[0]
     mov         r9, #0
-    ldrb        r5, [r0, r1]             ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]!    ; Left[2]
+    ldrb        r5, [r1], r2             ; Left[1]
+    ldrb        r6, [r1], r2             ; Left[2]
     usad8       r12, r8, r9
-    ldrb        r7, [r0, r1]             ; Left[3]
+    ldrb        r7, [r1]                 ; Left[3]
 
     ; calculate dc
     add         r4, r4, r5
@@ -55,31 +63,30 @@ b_dc_pred
     add         r4, r4, r7
     add         r4, r4, r12
     add         r4, r4, #4
-    ldr         r0, [sp, #40]           ; load stride
+    ldr         r0, [sp, #44]           ; dst_stride
     mov         r12, r4, asr #3         ; (expected_dc + 4) >> 3
 
     add         r12, r12, r12, lsl #8
-    add         r3, r3, r0
+    ldr         r3, [sp, #40]           ; dst
     add         r12, r12, r12, lsl #16
 
     ; store values
-    str         r12, [r3, -r0]
+    str         r12, [r3], r0
+    str         r12, [r3], r0
+    str         r12, [r3], r0
     str         r12, [r3]
-    str         r12, [r3, r0]
-    str         r12, [r3, r0, lsl #1]
 
     pop        {r4-r12, pc}
 
 b_tm_pred
-    sub         r10, r0, #1             ; Left
-    ldr         r8, [r0, -r1]           ; Above
-    ldrb        r9, [r10, -r1]          ; top_left
-    ldrb        r4, [r0, #-1]!          ; Left[0]
-    ldrb        r5, [r10, r1]!          ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
-    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
-    ldr         r0, [sp, #40]           ; load stride
-
+    ldr         r8, [r0]                ; Above
+    ldrb        r9, [sp, #48]           ; top_left
+    ldrb        r4, [r1], r2            ; Left[0]
+    ldrb        r5, [r1], r2            ; Left[1]
+    ldrb        r6, [r1], r2            ; Left[2]
+    ldrb        r7, [r1]                ; Left[3]
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     add         r9, r9, r9, lsl #16     ; [tl|tl]
     uxtb16      r10, r8                 ; a[2|0]
@@ -126,25 +133,26 @@ b_tm_pred
     str         r12, [r3], r0
 
     add         r12, r4, r5, lsl #8     ; [3|2|1|0]
-    str         r12, [r3], r0
+    str         r12, [r3]
 
     pop        {r4-r12, pc}
 
 b_ve_pred
-    ldr         r8, [r0, -r1]!          ; a[3|2|1|0]
+    ldr         r8, [r0]                ; a[3|2|1|0]
     ldr         r11, c00FF00FF
-    ldrb        r9, [r0, #-1]           ; top_left
+    ldrb        r9, [sp, #48]           ; top_left
     ldrb        r10, [r0, #4]           ; a[4]
 
     ldr         r0, c00020002
 
     uxtb16      r4, r8                  ; a[2|0]
     uxtb16      r5, r8, ror #8          ; a[3|1]
-    ldr         r2, [sp, #40]           ; stride
+    ldr         r2, [sp, #44]           ; dst_stride
     pkhbt       r9, r9, r5, lsl #16     ; a[1|-1]
 
     add         r9, r9, r4, lsl #1      ;[a[1]+2*a[2]       | tl+2*a[0]       ]
     uxtab16     r9, r9, r5              ;[a[1]+2*a[2]+a[3]  | tl+2*a[0]+a[1]  ]
+    ldr         r3, [sp, #40]           ; dst
     uxtab16     r9, r9, r0              ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
 
     add         r0, r0, r10, lsl #16    ;[a[4]+2            |                 2]
@@ -154,25 +162,23 @@ b_ve_pred
 
     and         r9, r11, r9, asr #2
     and         r4, r11, r4, asr #2
-    add         r3, r3, r2              ; dst + dst_stride
     add         r9, r9, r4, lsl #8
 
     ; store values
-    str         r9, [r3, -r2]
+    str         r9, [r3], r2
+    str         r9, [r3], r2
+    str         r9, [r3], r2
     str         r9, [r3]
-    str         r9, [r3, r2]
-    str         r9, [r3, r2, lsl #1]
 
     pop        {r4-r12, pc}
 
 
 b_he_pred
-    sub         r10, r0, #1             ; Left
-    ldrb        r4, [r0, #-1]!          ; Left[0]
-    ldrb        r8, [r10, -r1]          ; top_left
-    ldrb        r5, [r10, r1]!          ; Left[1]
-    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
-    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
+    ldrb        r4, [r1], r2            ; Left[0]
+    ldrb        r8, [sp, #48]           ; top_left
+    ldrb        r5, [r1], r2            ; Left[1]
+    ldrb        r6, [r1], r2            ; Left[2]
+    ldrb        r7, [r1]                ; Left[3]
 
     add         r8, r8, r4              ; tl   + l[0]
     add         r9, r4, r5              ; l[0] + l[1]
@@ -197,7 +203,8 @@ b_he_pred
     pkhtb       r10, r10, r10, asr #16  ; l[-|2|-|2]
     pkhtb       r11, r11, r11, asr #16  ; l[-|3|-|3]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     add         r8, r8, r8, lsl #8      ; l[0|0|0|0]
     add         r9, r9, r9, lsl #8      ; l[1|1|1|1]
@@ -206,16 +213,16 @@ b_he_pred
 
     ; store values
     str         r8, [r3], r0
-    str         r9, [r3]
-    str         r10, [r3, r0]
-    str         r11, [r3, r0, lsl #1]
+    str         r9, [r3], r0
+    str         r10, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_ld_pred
-    ldr         r4, [r0, -r1]!          ; Above
+    ldr         r4, [r0]                ; Above[0-3]
     ldr         r12, c00020002
-    ldr         r5, [r0, #4]
+    ldr         r5, [r0, #4]            ; Above[4-7]
     ldr         lr,  c00FF00FF
 
     uxtb16      r6, r4                  ; a[2|0]
@@ -225,7 +232,6 @@ b_ld_pred
     pkhtb       r10, r6, r8             ; a[2|4]
     pkhtb       r11, r7, r9             ; a[3|5]
 
-
     add         r4, r6, r7, lsl #1      ; [a2+2*a3      |      a0+2*a1]
     add         r4, r4, r10, ror #16    ; [a2+2*a3+a4   |   a0+2*a1+a2]
     uxtab16     r4, r4, r12             ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
@@ -244,7 +250,8 @@ b_ld_pred
     add         r7, r7, r9, asr #16     ; [                 a5+2*a6+a7]
     uxtah       r7, r7, r12             ; [               a5+2*a6+a7+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r4, lr, r4, asr #2
@@ -266,18 +273,17 @@ b_ld_pred
     mov         r6, r6, lsr #16
     mov         r11, r10, lsr #8
     add         r11, r11, r6, lsl #24   ; [6|5|4|3]
-    str         r11, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_rd_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1], r2            ; l[3] = pp[0]
 
 
     uxtb16      r9, lr                  ; p[7|5]
@@ -307,7 +313,8 @@ b_rd_pred
     add         r7, r7, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
     uxtab16     r7, r7, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r7, lr, r7, asr #2
@@ -328,18 +335,17 @@ b_rd_pred
 
     mov         r11, r10, lsl #8        ; [3|2|1|-]
     uxtab       r11, r11, r4            ; [3|2|1|0]
-    str         r11, [r3], r0
+    str         r11, [r3]
 
     pop        {r4-r12, pc}
 
 b_vr_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1]                ; l[3] = pp[0]
 
     add         r5, r5, r7, lsl #16     ; p[3|1]
     add         r6, r6, r8, lsl #16     ; p[4|2]
@@ -376,7 +382,8 @@ b_vr_pred
     add         r8, r8, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
     uxtab16     r8, r8, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r5, lr, r5, asr #2      ; [B|A]
@@ -397,14 +404,14 @@ b_vr_pred
     pkhtb       r10, r7, r5, asr #16    ; [-|H|-|B]
     str         r2, [r3], r0
     add         r12, r12, r10, lsl #8   ; [H|D|B|A]
-    str         r12, [r3], r0
+    str         r12, [r3]
 
     pop        {r4-r12, pc}
 
 b_vl_pred
-    ldr         r4, [r0, -r1]!          ; [3|2|1|0]
+    ldr         r4, [r0]                ; [3|2|1|0] = Above[0-3]
     ldr         r12, c00020002
-    ldr         r5, [r0, #4]            ; [7|6|5|4]
+    ldr         r5, [r0, #4]            ; [7|6|5|4] = Above[4-7]
     ldr         lr,  c00FF00FF
     ldr         r2,  c00010001
 
@@ -441,7 +448,8 @@ b_vl_pred
     add         r9, r9, r11             ; [p5+2*p6+p7   |   p3+2*p4+p5]
     uxtab16     r9, r9, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
 
-    ldr         r0, [sp, #40]           ; stride
+    ldr         r0, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     ; scale down
     and         r5, lr, r5, asr #2      ; [D|C]
@@ -449,7 +457,6 @@ b_vl_pred
     and         r8, lr, r8, asr #2      ; [I|D]
     and         r9, lr, r9, asr #2      ; [J|H]
 
-
     add         r10, r4, r6, lsl #8     ; [F|B|E|A]
     str         r10, [r3], r0
 
@@ -463,18 +470,17 @@ b_vl_pred
     str         r12, [r3], r0
 
     add         r10, r7, r10, lsl #8    ; [J|H|D|G]
-    str         r10, [r3], r0
+    str         r10, [r3]
 
     pop        {r4-r12, pc}
 
 b_hd_pred
-    sub         r12, r0, r1             ; Above = src - src_stride
-    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
-    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
-    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
-    ldrb        r6, [r0, r1]            ; l[1] = pp[2]
-    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
-    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
+    ldrb        r4, [r1]                ; l[3] = pp[0]
 
     uxtb16      r9, lr                  ; p[7|5]
     uxtb16      r10, lr, ror #8         ; p[8|6]
@@ -492,7 +498,6 @@ b_hd_pred
     pkhtb       r1, r9, r10             ; p[7|6]
     pkhbt       r10, r8, r10, lsl #16   ; p[6|5]
 
-
     uadd16      r11, r4, r5             ; [p1+p2        |        p0+p1]
     uhadd16     r11, r11, r2            ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
                                         ; [B|A]
@@ -518,7 +523,8 @@ b_hd_pred
     and         r5, lr, r5, asr #2      ; [H|G]
     and         r6, lr, r6, asr #2      ; [J|I]
 
-    ldr         lr, [sp, #40]           ; stride
+    ldr         lr, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
 
     pkhtb       r2, r0, r6              ; [-|F|-|I]
     pkhtb       r12, r6, r5, asr #16    ; [-|J|-|H]
@@ -527,7 +533,6 @@ b_hd_pred
     mov         r12, r12, ror #24       ; [J|I|H|F]
     str         r12, [r3], lr
 
-
     mov         r7, r11, asr #16        ; [-|-|-|B]
     str         r2, [r3], lr
     add         r7, r7, r0, lsl #16     ; [-|E|-|B]
@@ -536,21 +541,20 @@ b_hd_pred
     str         r7, [r3], lr
 
     add         r5, r11, r4, lsl #8     ; [D|B|C|A]
-    str         r5, [r3], lr
+    str         r5, [r3]
 
     pop        {r4-r12, pc}
 
 
 
 b_hu_pred
-    ldrb        r4, [r0, #-1]!          ; Left[0]
+    ldrb        r4, [r1], r2            ; Left[0]
     ldr         r12, c00020002
-    ldrb        r5, [r0, r1]!           ; Left[1]
+    ldrb        r5, [r1], r2            ; Left[1]
     ldr         lr,  c00FF00FF
-    ldrb        r6, [r0, r1]!           ; Left[2]
+    ldrb        r6, [r1], r2            ; Left[2]
     ldr         r2,  c00010001
-    ldrb        r7, [r0, r1]            ; Left[3]
-
+    ldrb        r7, [r1]                ; Left[3]
 
     add         r4, r4, r5, lsl #16     ; [1|0]
     add         r5, r5, r6, lsl #16     ; [2|1]
@@ -563,7 +567,8 @@ b_hu_pred
     add         r4, r4, r5, lsl #1      ; [p1+2*p2      |      p0+2*p1]
     add         r4, r4, r9              ; [p1+2*p2+p3   |   p0+2*p1+p2]
     uxtab16     r4, r4, r12             ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
-    ldr         r2, [sp, #40]           ; stride
+    ldr         r2, [sp, #44]           ; dst_stride
+    ldr         r3, [sp, #40]           ; dst
     and         r4, lr, r4, asr #2      ; [D|C]
 
     add         r10, r6, r7             ; [p2+p3]
@@ -587,9 +592,9 @@ b_hu_pred
 
     add         r10, r11, lsl #8        ; [-|-|F|E]
     add         r10, r10, r9, lsl #16   ; [G|G|F|E]
-    str         r10, [r3]
+    str         r10, [r3], r2
 
-    str         r7, [r3, r2]
+    str         r7, [r3]
 
     pop        {r4-r12, pc}
 
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 05d0df4f4d71aaea3ee30590423378945f46c526..f0bdf29bea434c09c0ad409e2b93702efc713e58 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -147,7 +147,8 @@ prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigne
 specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
 
 prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-# No existing specializations
+specialize vp8_intra4x4_predict media
+vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
 
 #
 # Postproc
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index bd971fab59a41299d47245b5610eac06d7dd17e5..a328f46c2b124f4210fa6aede92ccf1f0562da9e 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -148,7 +148,7 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/idct_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/loopfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/simpleloopfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/sixtappredict8x4_v6$(ASM)
-#VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequant_idct_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequantize_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/idct_blk_v6.c