From f2ef3892564e8a2fd76a9ae1b9d19058572f713b Mon Sep 17 00:00:00 2001 From: hkuang <hkuang@google.com> Date: Thu, 9 Jan 2014 18:40:19 -0800 Subject: [PATCH] Add vp9_tm_predictor_4x4 neon implementation Change-Id: I10c423bde7ea5a3bac9f14f35c73b6bc31c8f3e3 --- vp9/common/arm/neon/vp9_reconintra_neon.asm | 49 +++++++++++++++++++++ vp9/common/vp9_rtcd_defs.sh | 2 +- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index f106bc78e2..98619bb305 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -16,6 +16,7 @@ EXPORT |vp9_h_predictor_8x8_neon| EXPORT |vp9_h_predictor_16x16_neon| EXPORT |vp9_h_predictor_32x32_neon| + EXPORT |vp9_tm_predictor_4x4_neon| ARM REQUIRE8 PRESERVE8 @@ -283,4 +284,52 @@ loop_h bx lr ENDP ; |vp9_h_predictor_32x32_neon| +;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_4x4_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 d0, r12 + + ; Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + ; 3rd row and 4th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + bx lr + ENDP ; |vp9_tm_predictor_4x4_neon| + END diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index cc571072fc..a32f98aa50 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -57,7 +57,7 @@ prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint specialize vp9_v_predictor_4x4 $sse_x86inc neon prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_4x4 $sse_x86inc dspr2 +specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2 prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2 -- GitLab