diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 3412ddd23604b6019d3fa1f0fca9dcb25b21acd1..6af2abb79f66d94498634035e1d1a66a14a9ae11 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -645,6 +645,26 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( #endif #if HAVE_AVX2 +// TODO(jzern): these prototypes can be removed after the avx2 versions are +// reenabled in vp9_rtcd_defs.pl. +extern "C" { +void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); +void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); +void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); +} + const ConvolveFunctions convolve8_avx2( vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, @@ -655,8 +675,10 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( make_tuple(8, 4, &convolve8_avx2), make_tuple(4, 8, &convolve8_avx2), make_tuple(8, 8, &convolve8_avx2), + make_tuple(8, 16, &convolve8_avx2))); + +INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values( make_tuple(16, 8, &convolve8_avx2), - make_tuple(8, 16, &convolve8_avx2), make_tuple(16, 16, &convolve8_avx2), make_tuple(32, 16, &convolve8_avx2), make_tuple(16, 32, &convolve8_avx2), diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index 99c8d0c7cc4bfc4cc7598efb86bcb8cba96d8bf8..e6a20fb4144283aa3f381dab941692567d6a7497 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -606,4 +606,29 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0))); #endif + +#if HAVE_AVX2 +// TODO(jzern): these prototypes can be removed after the avx2 versions are +// reenabled in vp9_rtcd_defs.pl. +extern "C" { +void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride); +void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride, + int tx_type); +} +INSTANTIATE_TEST_CASE_P( + DISABLED_AVX2, Trans16x16DCT, + ::testing::Values( + make_tuple(&vp9_fdct16x16_avx2, + &vp9_idct16x16_256_add_c, 0))); +INSTANTIATE_TEST_CASE_P( + AVX2, Trans16x16HT, + ::testing::Values( + make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 3))); +INSTANTIATE_TEST_CASE_P( + DISABLED_AVX2, Trans16x16HT, + ::testing::Values( + make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 0), + make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 1), + make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 2))); +#endif } // namespace diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 961eb4d6c22828f972137138c56138c59c1420ac..ec233d3f3658adcbb01a5c05e2c80765c53fe9fa 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -376,4 +376,19 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3))); #endif +#if HAVE_AVX2 +INSTANTIATE_TEST_CASE_P( + AVX2, Trans4x4DCT, + ::testing::Values( + make_tuple(&vp9_fdct4x4_avx2, + &vp9_idct4x4_16_add_c, 0))); +INSTANTIATE_TEST_CASE_P( + AVX2, Trans4x4HT, + ::testing::Values( + make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 0), + make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 1), + make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 2), + make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 3))); +#endif + } // namespace diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 72a5fad352f0ffb72830026d2a00d4082e62c459..146aa31c688a8c9777d311d240b8233638a57b36 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -367,4 +367,18 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0))); #endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_CASE_P( + AVX2, FwdTrans8x8DCT, + ::testing::Values( + make_tuple(&vp9_fdct8x8_avx2, &vp9_idct8x8_64_add_c, 0))); +INSTANTIATE_TEST_CASE_P( + AVX2, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 0), + make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 1), + make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 2), + make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 3))); +#endif } // namespace diff --git a/test/sad_test.cc b/test/sad_test.cc index f9ffa92dec35346fda271aeffea51c6d3d811e73..89d8c415206f05a8c85eba6c6ea8d5349741579e 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -627,4 +627,24 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( #endif // CONFIG_USE_X86INC #endif // HAVE_SSSE3 +#if HAVE_AVX2 +#if CONFIG_VP9_ENCODER +// TODO(jzern): these prototypes can be removed after the avx2 versions are +// reenabled in vp9_rtcd_defs.pl. +extern "C" { +void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + unsigned int *sad_array); +void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + unsigned int *sad_array); +} +const sad_n_by_n_by_4_fn_t sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2; +const sad_n_by_n_by_4_fn_t sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2; +INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values( + make_tuple(32, 32, sad_32x32x4d_avx2), + make_tuple(64, 64, sad_64x64x4d_avx2))); +#endif // CONFIG_VP9_ENCODER +#endif // HAVE_AVX2 + } // namespace diff --git a/test/variance_test.cc b/test/variance_test.cc index c9bf13a6bd4dbf57007b29c292b46a60a9c5286b..9985695163adf2a09f6eba763673f3bc394728e1 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -702,6 +702,57 @@ INSTANTIATE_TEST_CASE_P( make_tuple(6, 6, subpel_avg_variance64x64_ssse3))); #endif #endif + +#if HAVE_AVX2 +// TODO(jzern): these prototypes can be removed after the avx2 versions are +// reenabled in vp9_rtcd_defs.pl. +extern "C" { +unsigned int vp9_sub_pixel_variance32x32_avx2( + const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_variance64x64_avx2( + const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp9_sub_pixel_avg_variance32x32_avx2( + const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, + const uint8_t *second_pred); +unsigned int vp9_sub_pixel_avg_variance64x64_avx2( + const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, + const uint8_t *second_pred); +} +const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2; +const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2; +const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2; +const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2; +const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2; +INSTANTIATE_TEST_CASE_P( + AVX2, VP9VarianceTest, + ::testing::Values(make_tuple(4, 4, variance16x16_avx2), + make_tuple(5, 4, variance32x16_avx2), + make_tuple(5, 5, variance32x32_avx2), + make_tuple(6, 5, variance64x32_avx2), + make_tuple(6, 6, variance64x64_avx2))); + +const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 = + vp9_sub_pixel_variance32x32_avx2; +const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 = + vp9_sub_pixel_variance64x64_avx2; +INSTANTIATE_TEST_CASE_P( + DISABLED_AVX2, VP9SubpelVarianceTest, + ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2), + make_tuple(6, 6, subpel_variance64x64_avx2))); + +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 = + vp9_sub_pixel_avg_variance32x32_avx2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 = + vp9_sub_pixel_avg_variance64x64_avx2; +INSTANTIATE_TEST_CASE_P( + DISABLED_AVX2, VP9SubpelAvgVarianceTest, + ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2), + make_tuple(6, 6, subpel_avg_variance64x64_avx2))); +#endif // HAVE_AVX2 #endif // CONFIG_VP9_ENCODER } // namespace vp9 diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 09ce72ef2c9575cd5b4b858c05a6fc53fae7490d..06ed4707984575334d5357e457474438f201b242 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc"; $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_neon_asm=vp9_convolve8_neon; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; @@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vp9_sad4x4x8 sse4/; add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad64x64x4d sse2 avx2/; +specialize qw/vp9_sad64x64x4d sse2/; add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad32x64x4d sse2/; @@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co specialize qw/vp9_sad16x32x4d sse2/; add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad32x32x4d sse2 avx2/; +specialize qw/vp9_sad32x32x4d sse2/; add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad16x16x4d sse2/; @@ -739,7 +739,7 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid specialize qw/vp9_fht8x8 sse2 avx2/; add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; -specialize qw/vp9_fht16x16 sse2 avx2/; +specialize qw/vp9_fht16x16 sse2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; @@ -751,7 +751,7 @@ add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stri specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64"; add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct16x16 sse2 avx2/; +specialize qw/vp9_fdct16x16 sse2/; add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; specialize qw/vp9_fdct32x32 sse2 avx2/;