diff --git a/configure b/configure index 3651334e2874214bbf1c02c16190d357fd4d6e32..c45f908d9b02907a588cba7ee4f0e240a1a38ab4 100755 --- a/configure +++ b/configure @@ -257,6 +257,7 @@ CONFIG_LIST=" install_bins install_libs install_srcs + force_x86inc debug gprof gcov diff --git a/libs.mk b/libs.mk index 4aa7dc48a0551b457931a9cb3593c8d17fe3e2c4..ac2e73a43ab310bb970b8daea26f47ab86879a2a 100644 --- a/libs.mk +++ b/libs.mk @@ -57,6 +57,19 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h RTCD += $$(BUILD_PFX)$(1).h endef +# x86inc.asm is not compatible with pic 32bit builds. Restrict +# files which use it to 64bit builds or 32bit without pic +USE_X86INC = no +ifneq ($(CONFIG_PIC),yes) + USE_X86INC = yes +endif +ifeq ($(ARCH_X86_64),yes) + USE_X86INC = yes +endif +ifeq ($(CONFIG_FORCE_X86INC),yes) + USE_X86INC = yes +endif + CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 3b72129cc261071e245308c4fb32cd7ffc460187..b1510c64825eafe8f423152294f48a9cf6963509 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -527,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( #if HAVE_SSSE3 const ConvolveFunctions convolve8_ssse3( - vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c, - vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c, - vp9_convolve8_ssse3, vp9_convolve8_avg_c); + vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, + vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, + vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3); INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_ssse3), diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index f86e2ffbea50b7719d7bc1518b77b11a1061b9ee..ec420735a91e333f4cdabea5829ab3a07cacf444 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -20,7 +20,11 @@ EOF } forward_decls vp9_common_forward_decls -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 +# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. +[ $arch = "x86_64" -o ! "$CONFIG_PIC" = "yes" -o $CONFIG_FORCE_X86INC == "yes"] && mmx_x86inc=mmx && sse2_x86inc=sse2 && ssse3_x86inc=ssse3 + +# this variable is for functions that are 64 bit only. +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3 # # Dequant @@ -263,10 +267,10 @@ specialize vp9_blend_b # Sub Pixel Filters # prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_copy sse2 +specialize vp9_convolve_copy $sse2_x86inc prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_avg sse2 +specialize vp9_convolve_avg $sse2_x86inc prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" specialize vp9_convolve8 ssse3 neon @@ -702,8 +706,6 @@ specialize vp9_block_error sse2 prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" specialize vp9_subtract_block sse2 -[ $arch = "x86_64" ] && ssse3_x86_64=ssse3 - prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" specialize vp9_quantize_b $ssse3_x86_64 @@ -714,13 +716,11 @@ specialize vp9_quantize_b_32x32 $ssse3_x86_64 # Structured Similarity (SSIM) # if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 - prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_8x8 $sse2_on_x86_64 + specialize vp9_ssim_parms_8x8 $sse2_x86_64 prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_16x16 $sse2_on_x86_64 + specialize vp9_ssim_parms_16x16 $sse2_x86_64 fi # fdct functions diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 39c52fe8a6d1edf60030b5a63db0226d7f4fe5a3..b2b2a80a7266892f6ee16645eab61517972da710 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -74,7 +74,6 @@ VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_ss VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm @@ -83,6 +82,10 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm endif +ifeq ($(USE_X86INC),yes) +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm +endif + VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c