diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index fe62fae134d4cbd7ee3c8b58719df81a80ccae0f..83921f8074dd8d69957f12edd2ee70b1399833cb 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -105,6 +105,8 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
         rtcd->recon.recon       = vp8_recon_b_neon;
         rtcd->recon.recon2      = vp8_recon2b_neon;
         rtcd->recon.recon4      = vp8_recon4b_neon;
+        rtcd->recon.recon_mb    = vp8_recon_mb_neon;
+
     }
 #endif
 
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/neon/recon_neon.c
similarity index 91%
rename from vp8/common/arm/recon_arm.c
rename to vp8/common/arm/neon/recon_neon.c
index a1b210130990ef9217e69202835207c5bf82ca27..3b2df4c9fb0ccb0f949724ad1c33f207c2db8bcc 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -15,8 +15,7 @@
 
 extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
 
-#if HAVE_ARMV7
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
     unsigned char *pred_ptr = &x->predictor[0];
     short *diff_ptr = &x->diff[0];
@@ -28,4 +27,3 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
     vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
 }
-#endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index c30f6dc2dcf9a69612d8e0993ee8839477a0843d..bf32e6205d599fddae62f68a3c95744c54b10107 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -51,6 +51,8 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
+extern prototype_recon_macroblock(vp8_recon_recon_mb_neon);
+
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_neon
@@ -69,6 +71,9 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
+
+#undef  vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_neon
 #endif
 #endif
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0ef375e334fa2b72a4f0a3638ab593d44bd2b3b7..0c9b77e76275916a5a2849fc2986f9ee848f3a82 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -40,9 +40,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
     rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
     rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
+    rtcd->recon.recon       = vp8_recon_b_c;
     rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
+    rtcd->recon.recon4      = vp8_recon4b_c;
+    rtcd->recon.recon_mb    = vp8_recon_mb_c;
+    rtcd->recon.recon_mby   = vp8_recon_mby_c;
 
     rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
     rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index f31ffd8e4dfbcfeb22cd36ffab043f2002ff8167..2cb3bc6ce14a707b875f63304ed93bd6854c07d8 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -106,7 +106,7 @@ void vp8_recon2b_c
     }
 }
 
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
 #if ARCH_ARM
     BLOCKD *b = &x->block[0];
@@ -135,7 +135,7 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 #endif
 }
 
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
 #if ARCH_ARM
     BLOCKD *b = &x->block[0];
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index e34a63c86d561c46359651601fa1edc398fbe0f3..c432c7bd27338a1d16f59d10cb6138fb5816d7f2 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -12,12 +12,19 @@
 #ifndef __INC_RECON_H
 #define __INC_RECON_H
 
+#include "blockd.h"
+
 #define prototype_copy_block(sym) \
     void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)
 
 #define prototype_recon_block(sym) \
     void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
 
+#define prototype_recon_macroblock(sym) \
+    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x);
+
+struct vp8_recon_rtcd_vtable;
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/recon_x86.h"
 #endif
@@ -56,9 +63,20 @@ extern prototype_recon_block(vp8_recon_recon2);
 #endif
 extern prototype_recon_block(vp8_recon_recon4);
 
+#ifndef vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mb);
+
+#ifndef vp8_recon_recon_mby
+#define vp8_recon_recon_mby vp8_recon_mby_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mby);
+
 typedef prototype_copy_block((*vp8_copy_block_fn_t));
 typedef prototype_recon_block((*vp8_recon_fn_t));
-typedef struct
+typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
+typedef struct vp8_recon_rtcd_vtable
 {
     vp8_copy_block_fn_t  copy16x16;
     vp8_copy_block_fn_t  copy8x8;
@@ -66,6 +84,8 @@ typedef struct
     vp8_recon_fn_t       recon;
     vp8_recon_fn_t       recon2;
     vp8_recon_fn_t       recon4;
+    vp8_recon_mb_fn_t    recon_mb;
+    vp8_recon_mb_fn_t    recon_mby;
 } vp8_recon_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -74,9 +94,6 @@ typedef struct
 #define RECON_INVOKE(ctx,fn) vp8_recon_##fn
 #endif
 
-#include "blockd.h"
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 #endif
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index a790456f89a1ecdef13a23325e651ed8528115c2..1c72b90f128c36d5ab810d5788dbe3acc2f7d84b 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -113,7 +113,8 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 
     // make sure block modes are set the way we want them for context updates
     for (b = 0; b < 16; b++)
@@ -157,7 +158,8 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 
     // make sure block modes are set the way we want them for context updates
     for (b = 0; b < 16; b++)
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 99ee2b8067fc26cd3de1a0396c2087e11f85dda0..043eac219797cbd2779b4ee4231caa40fc877647 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -639,7 +639,8 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mb)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
 
 
@@ -656,7 +657,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
 
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 24cbbdd0c671ed16849bc235d4e5b8a7b20f5a77..bb3f8259c77c16444aff9cb981facb7d4637c0e8 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -118,7 +118,6 @@ VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
@@ -162,6 +161,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/sixtappredict16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon16x16mb_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/save_neon_reg$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon_neon.c
 
 
 #