diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index f106bc78e24df8de3209591378e2d0cabec4cec9..98619bb30582f9042844cfa611bc4b3c2c923641 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -16,6 +16,7 @@ EXPORT |vp9_h_predictor_8x8_neon| EXPORT |vp9_h_predictor_16x16_neon| EXPORT |vp9_h_predictor_32x32_neon| + EXPORT |vp9_tm_predictor_4x4_neon| ARM REQUIRE8 PRESERVE8 @@ -283,4 +284,52 @@ loop_h bx lr ENDP ; |vp9_h_predictor_32x32_neon| +;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_4x4_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 d0, r12 + + ; Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + ; 3rd row and 4th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + bx lr + ENDP ; |vp9_tm_predictor_4x4_neon| + END diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index cc571072fcf67ae8871da54a5dcd525ae9994049..a32f98aa50ee6182e8af92484702f11d6d1ad933 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -57,7 +57,7 @@ prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint specialize vp9_v_predictor_4x4 $sse_x86inc neon prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_4x4 $sse_x86inc dspr2 +specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2 prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2