Commit efa82922 authored by James Zern's avatar James Zern Committed by Gerrit Code Review

Merge "Fix bug 806"

parents b418c958 af10457e
......@@ -640,19 +640,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
#if HAVE_AVX2
#if CONFIG_VP9_ENCODER
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
unsigned int *sad_array);
void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
unsigned int *sad_array);
}
const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
make_tuple(32, 32, sad_32x32x4d_avx2),
make_tuple(64, 64, sad_64x64x4d_avx2)));
#endif // CONFIG_VP9_ENCODER
......
......@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x64x4d sse2/;
specialize qw/vp9_sad64x64x4d sse2 avx2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
......@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x32x4d sse2/;
specialize qw/vp9_sad32x32x4d sse2 avx2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
......
......@@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 32 ; i++) {
// load src and all refs
src_reg = _mm256_load_si256((__m256i *)(src));
src_reg = _mm256_loadu_si256((__m256i *)(src));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
......@@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 64 ; i++) {
// load 64 bytes from src and all refs
src_reg = _mm256_load_si256((__m256i *)(src));
srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
src_reg = _mm256_loadu_si256((__m256i *)(src));
srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment