Commit 8366b414 authored by Jian Zhou's avatar Jian Zhou

Code clean of highbd_dc_predictor_4x4

MMX replaced with SSE2, same performance.

Change-Id: Ic57855254e26757191933c948fac6aa047fadafc
parent f075fdc4
...@@ -141,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, ...@@ -141,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_16x16_c, 16, 8), &vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8), &vpx_highbd_tm_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8), &vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8), &vpx_highbd_dc_predictor_8x8_c, 8, 8),
...@@ -162,7 +162,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, ...@@ -162,7 +162,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
#else #else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values( ::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8), &vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8), &vpx_highbd_dc_predictor_8x8_c, 8, 8),
...@@ -194,7 +194,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, ...@@ -194,7 +194,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, &vpx_highbd_tm_predictor_32x32_c, 32,
10), 10),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10), &vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10), &vpx_highbd_dc_predictor_8x8_c, 8, 10),
...@@ -218,7 +218,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, ...@@ -218,7 +218,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
#else #else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values( ::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10), &vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10), &vpx_highbd_dc_predictor_8x8_c, 8, 10),
...@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, ...@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, &vpx_highbd_tm_predictor_32x32_c, 32,
12), 12),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12), &vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12), &vpx_highbd_dc_predictor_8x8_c, 8, 12),
...@@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, ...@@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
#else #else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values( ::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_4x4_sse, make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12), &vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12), &vpx_highbd_dc_predictor_8x8_c, 8, 12),
......
...@@ -294,7 +294,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { ...@@ -294,7 +294,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/; specialize qw/vpx_highbd_dc_top_predictor_4x4/;
......
...@@ -17,24 +17,20 @@ pw_16: times 4 dd 16 ...@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
pw_32: times 4 dd 32 pw_32: times 4 dd 32
SECTION .text SECTION .text
INIT_MMX sse INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq GET_GOT goffsetq
movq m0, [aboveq] movq m0, [aboveq]
movq m2, [leftq] movq m2, [leftq]
DEFINE_ARGS dst, stride, one
mov oned, 0x0001
pxor m1, m1
movd m3, oned
pshufw m3, m3, 0x0
paddw m0, m2 paddw m0, m2
pmaddwd m0, m3 pshuflw m1, m0, 0xe
packssdw m0, m1 paddw m0, m1
pmaddwd m0, m3 pshuflw m1, m0, 0x1
paddw m0, m1
paddw m0, [GLOBAL(pw_4)] paddw m0, [GLOBAL(pw_4)]
psraw m0, 3 psraw m0, 3
pshufw m0, m0, 0x0 pshuflw m0, m0, 0x0
movq [dstq ], m0 movq [dstq ], m0
movq [dstq+strideq*2], m0 movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4] lea dstq, [dstq+strideq*4]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment