Commit 3a2ad10d authored by James Zern's avatar James Zern Committed by Gerrit Code Review

Merge "Code clean of sad4xNx4D_sse"

parents 9e612763 789dbb31
......@@ -700,16 +700,6 @@ const SadMxNParam mmx_tests[] = {
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#endif // HAVE_MMX
#if HAVE_SSE
#if CONFIG_USE_X86INC
const SadMxNx4Param x4d_sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8x4d_sse, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse, -1),
};
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
......@@ -828,6 +818,8 @@ const SadMxNx4Param x4d_sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16x4d_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8x4d_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4x4d_sse2, -1),
make_tuple(4, 8, &vpx_sad4x8x4d_sse2, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8),
......
......@@ -1156,10 +1156,10 @@ add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
#
# Structured Similarity (SSIM)
......
......@@ -20,33 +20,41 @@ SECTION .text
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m6, [ref1q+%5]
punpckldq m4, [ref2q+%5]
punpckldq m7, [ref3q+%5]
punpckldq m5, [ref4q+%5]
movd m1, [srcq +%4]
movd m2, [ref1q+%5]
punpckldq m0, m1
punpckldq m6, m2
movd m1, [ref2q+%5]
movd m2, [ref3q+%5]
movd m3, [ref4q+%5]
punpckldq m4, m1
punpckldq m7, m2
punpckldq m5, m3
movlhps m0, m0
movlhps m6, m4
movlhps m7, m5
psadbw m6, m0
psadbw m4, m0
psadbw m7, m0
psadbw m5, m0
punpckldq m6, m4
punpckldq m7, m5
%else
movd m1, [ref1q+%3]
movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
movd m4, [ref2q+%5]
punpckldq m1, m5
punpckldq m2, m4
movd m3, [ref3q+%3]
movd m5, [ref3q+%5]
punpckldq m3, m5
movd m4, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m1, [ref1q+%5]
punpckldq m2, [ref2q+%5]
punpckldq m3, [ref3q+%5]
punpckldq m4, [ref4q+%5]
movd m5, [ref4q+%5]
punpckldq m4, m5
movd m5, [srcq +%4]
punpckldq m0, m5
movlhps m0, m0
movlhps m1, m2
movlhps m3, m4
psadbw m1, m0
psadbw m2, m0
psadbw m3, m0
psadbw m4, m0
punpckldq m1, m2
punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
......@@ -170,7 +178,7 @@ SECTION .text
; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
......@@ -192,7 +200,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
%if mmsize == 16
%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
......@@ -207,6 +215,8 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
RET
%else
movifnidn r4, r4mp
pshufd m6, m6, 0x08
pshufd m7, m7, 0x08
movq [r4+0], m6
movq [r4+8], m7
RET
......@@ -225,7 +235,5 @@ SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment