diff --git a/configure b/configure
index 24be893f7ad731b595434fa0ec038ae0d5d3da2f..64f016547fcf537d646241503b434f2f9b7eec23 100755
--- a/configure
+++ b/configure
@@ -314,6 +314,7 @@ CMDLINE_SELECT="
     gprof
     gcov
     pic
+    use_x86inc
     optimizations
     ccache
     runtime_cpu_detect
diff --git a/test/sad_test.cc b/test/sad_test.cc
index bf3e0b81908a39f9e82aa0b847ede94b1913363b..b85da2f15495cbee89c08ae5c6db07aef662b84a 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -428,6 +428,7 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
 
 #if HAVE_SSE
 #if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
 const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
 INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
@@ -441,6 +442,7 @@ INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
                         make_tuple(4, 4, sad_4x4x4d_sse)));
 #endif
 #endif
+#endif
 
 #if HAVE_SSE2
 #if CONFIG_VP8_ENCODER
@@ -451,6 +453,7 @@ const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
 const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
 #endif
 #if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
 const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
 const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
@@ -463,6 +466,7 @@ const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
 const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
 const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
 #endif
+#endif
 const sad_m_by_n_test_param_t sse2_tests[] = {
 #if CONFIG_VP8_ENCODER
   make_tuple(16, 16, sad_16x16_wmt),
@@ -472,6 +476,7 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
   make_tuple(4, 4, sad_4x4_wmt),
 #endif
 #if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
   make_tuple(64, 64, sad_64x64_sse2_vp9),
   make_tuple(64, 32, sad_64x32_sse2_vp9),
   make_tuple(32, 64, sad_32x64_sse2_vp9),
@@ -484,6 +489,7 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
   make_tuple(8, 8, sad_8x8_sse2_vp9),
   make_tuple(8, 4, sad_8x4_sse2_vp9),
 #endif
+#endif
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
 
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 798df0e2a871495c50a60468909f0bdff29c0ead..f61d26d0b146885f62cd5ccb901d9b00c1c146ec 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -480,82 +480,82 @@ prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, in
 specialize vp9_sub_pixel_avg_variance4x4 sse ssse3
 
 prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64 sse2
+specialize vp9_sad64x64 $sse2_x86inc
 
 prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64 sse2
+specialize vp9_sad32x64 $sse2_x86inc
 
 prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32 sse2
+specialize vp9_sad64x32 $sse2_x86inc
 
 prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16 sse2
+specialize vp9_sad32x16 $sse2_x86inc
 
 prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32 sse2
+specialize vp9_sad16x32 $sse2_x86inc
 
 prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32 sse2
+specialize vp9_sad32x32 $sse2_x86inc
 
 prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx sse2
+specialize vp9_sad16x16 mmx $sse2_x86inc
 
 prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x8 mmx sse2
+specialize vp9_sad16x8 mmx $sse2_x86inc
 
 prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x16 mmx sse2
+specialize vp9_sad8x16 mmx $sse2_x86inc
 
 prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x8 mmx sse2
+specialize vp9_sad8x8 mmx $sse2_x86inc
 
 prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x4 sse2
+specialize vp9_sad8x4 $sse2_x86inc
 
 prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x8 sse
+specialize vp9_sad4x8 $sse_x86inc
 
 prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx sse
+specialize vp9_sad4x4 mmx $sse_x86inc
 
 prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x64_avg sse2
+specialize vp9_sad64x64_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x64_avg sse2
+specialize vp9_sad32x64_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x32_avg sse2
+specialize vp9_sad64x32_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x16_avg sse2
+specialize vp9_sad32x16_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x32_avg sse2
+specialize vp9_sad16x32_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x32_avg sse2
+specialize vp9_sad32x32_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x16_avg sse2
+specialize vp9_sad16x16_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x8_avg sse2
+specialize vp9_sad16x8_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x16_avg sse2
+specialize vp9_sad8x16_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x8_avg sse2
+specialize vp9_sad8x8_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x4_avg sse2
+specialize vp9_sad8x4_avg $sse2_x86inc
 
 prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x8_avg sse
+specialize vp9_sad4x8_avg $sse_x86inc
 
 prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x4_avg sse
+specialize vp9_sad4x4_avg $sse_x86inc
 
 prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance_halfpixvar16x16_h sse2
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 572a28d8f0e83011d73bd471fd2e7a294f2faada..5a96fba9e57df53dbe4f7f6479cc5ef315f525a1 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -80,7 +80,6 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
@@ -89,6 +88,7 @@ VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
 
 ifeq ($(USE_X86INC),yes)
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
 endif