Commit 69a5f5ec authored by levytamar82's avatar levytamar82 Committed by James Zern

Fix bug 807

in the sub_pixel_*variance* function the dst is aligned to 16 bytes and not
to 32 bytes - now load unaligned data

Change-Id: I2e0b9745543697efc56fefa32857ea10117af135
parent 72a5832a
......@@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_AVX2
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
unsigned int vp9_sub_pixel_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
}
const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
......@@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelVarianceTest,
AVX2, VP9SubpelVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
make_tuple(6, 6, subpel_variance64x64_avx2)));
......@@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
vp9_sub_pixel_avg_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelAvgVarianceTest,
AVX2, VP9SubpelAvgVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
#endif // HAVE_AVX2
......
......@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
......@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
......
......@@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
#define LOAD_SRC_DST \
/* load source and destination */ \
src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
dst_reg = _mm256_load_si256((__m256i const *) (dst));
dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
#define AVG_NEXT_SRC(src_reg, size_stride) \
src_next_reg = _mm256_loadu_si256((__m256i const *) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment