Commit 69a5f5ec authored by levytamar82's avatar levytamar82 Committed by James Zern

Fix bug 807

in the sub_pixel_*variance* function the dst is aligned to 16 bytes and not
to 32 bytes - now load unaligned data

Change-Id: I2e0b9745543697efc56fefa32857ea10117af135
parent 72a5832a
...@@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P( ...@@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P(
#endif #endif
#if HAVE_AVX2 #if HAVE_AVX2
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
unsigned int vp9_sub_pixel_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
}
const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2; const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2; const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2; const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
...@@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 = ...@@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 = const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2; vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelVarianceTest, AVX2, VP9SubpelVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2), ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
make_tuple(6, 6, subpel_variance64x64_avx2))); make_tuple(6, 6, subpel_variance64x64_avx2)));
...@@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 = ...@@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 = const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
vp9_sub_pixel_avg_variance64x64_avx2; vp9_sub_pixel_avg_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelAvgVarianceTest, AVX2, VP9SubpelAvgVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2), ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
make_tuple(6, 6, subpel_avg_variance64x64_avx2))); make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
#endif // HAVE_AVX2 #endif // HAVE_AVX2
......
...@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ ...@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
...@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ ...@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc"; specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
......
...@@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { ...@@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
#define LOAD_SRC_DST \ #define LOAD_SRC_DST \
/* load source and destination */ \ /* load source and destination */ \
src_reg = _mm256_loadu_si256((__m256i const *) (src)); \ src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
dst_reg = _mm256_load_si256((__m256i const *) (dst)); dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
#define AVG_NEXT_SRC(src_reg, size_stride) \ #define AVG_NEXT_SRC(src_reg, size_stride) \
src_next_reg = _mm256_loadu_si256((__m256i const *) \ src_next_reg = _mm256_loadu_si256((__m256i const *) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment