Commit f4621c5c authored by Jian Zhou's avatar Jian Zhou

Speed up tm_predictor_8x8

Left neighbor read from memory only once.
Speed up by ~20% in ./test_intra_pred_speed.

Change-Id: Ia1388630df6fed0dce9a6eeded6cb855bbc43505
parent f3f6b6fe
...@@ -545,33 +545,31 @@ cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left ...@@ -545,33 +545,31 @@ cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left
RET RET
INIT_XMM sse2 INIT_XMM sse2
cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left
pxor m1, m1 pxor m1, m1
movd m2, [aboveq-1] movd m2, [aboveq-1]
movq m0, [aboveq] movq m0, [aboveq]
punpcklbw m2, m1 punpcklbw m2, m1
punpcklbw m0, m1 punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word]
pshuflw m2, m2, 0x0 pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word]
DEFINE_ARGS dst, stride, line, left DEFINE_ARGS dst, stride, line, left
mov lineq, -4 mov lineq, -4
punpcklqdq m2, m2 punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word]
add leftq, 8 psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word]
psubw m0, m2 movq m2, [leftq]
.loop: punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word]
movd m2, [leftq+lineq*2] .loop
movd m3, [leftq+lineq*2+1] pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word]
punpcklbw m2, m1 pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word]
punpcklbw m3, m1 punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word]
pshuflw m2, m2, 0x0 punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word]
pshuflw m3, m3, 0x0 paddw m4, m0
punpcklqdq m2, m2
punpcklqdq m3, m3
paddw m2, m0
paddw m3, m0 paddw m3, m0
packuswb m2, m3 packuswb m4, m3
movq [dstq ], m2 movq [dstq ], m4
movhps [dstq+strideq], m2 movhps [dstq+strideq], m4
lea dstq, [dstq+strideq*2] lea dstq, [dstq+strideq*2]
psrldq m2, 4
inc lineq inc lineq
jnz .loop jnz .loop
REP_RET REP_RET
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment