diff --git a/test/variance_test.cc b/test/variance_test.cc index 40b7df630e71c2a24bc8c54d7199d17d919b65ca..7d8118235d9098db589efedcb9110834f59f7e8b 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P( #endif #if HAVE_AVX2 -// TODO(jzern): these prototypes can be removed after the avx2 versions are -// reenabled in vp9_rtcd_defs.pl. -extern "C" { -unsigned int vp9_sub_pixel_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_avg_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -unsigned int vp9_sub_pixel_avg_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -} + const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2; const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2; const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2; @@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 = const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 = vp9_sub_pixel_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelVarianceTest, + AVX2, VP9SubpelVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2), make_tuple(6, 6, subpel_variance64x64_avx2))); @@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 = const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 = vp9_sub_pixel_avg_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelAvgVarianceTest, + AVX2, VP9SubpelAvgVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2), make_tuple(6, 6, subpel_avg_variance64x64_avx2))); #endif // HAVE_AVX2 diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 36bdc8e14c2f57889b589da28caa2edbe503d635..708f41b872fa31660e54ff99f7dc35b92c25ca89 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc"; diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c index 34ed1867f6102d0084e774a251b364cd4f40511c..9aa4da9620aa9147e782b7fd73eeaf4415d79514 100644 --- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c +++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c @@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { #define LOAD_SRC_DST \ /* load source and destination */ \ src_reg = _mm256_loadu_si256((__m256i const *) (src)); \ - dst_reg = _mm256_load_si256((__m256i const *) (dst)); + dst_reg = _mm256_loadu_si256((__m256i const *) (dst)); #define AVG_NEXT_SRC(src_reg, size_stride) \ src_next_reg = _mm256_loadu_si256((__m256i const *) \