diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm index d6f1f0b9c0fa0c3cecd24d3819fc5749dc4c86a2..4fe1a6ac640b7680c5855dbf4be1bd06cf47bbd8 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm +++ b/vp9/common/arm/neon/vp9_loopfilter_neon.asm @@ -38,6 +38,7 @@ push {lr} ldr r12, [sp,#8] ; load count + add r1, r1, r1 ; double pitch cmp r12, #0 beq end_vp9_lf_h_edge @@ -47,9 +48,8 @@ vld1.8 {d2[]}, [r2] ; duplicate *thresh count_lf_h_loop - sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines - add r3, r2, r1 - add r1, r1, r1 + sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines + add r3, r2, r1, lsr #1 ; set to 3 lines down vld1.u8 {d3}, [r2@64], r1 ; p3 vld1.u8 {d4}, [r3@64], r1 ; p2 @@ -278,6 +278,7 @@ end_vp9_lf_v_edge push {r4-r5, lr} ldr r12, [sp,#16] ; load count + add r1, r1, r1 ; double pitch cmp r12, #0 beq end_vp9_mblf_h_edge @@ -287,9 +288,8 @@ end_vp9_lf_v_edge vld1.8 {d2[]}, [r2] ; duplicate *thresh count_mblf_h_loop - sub r3, r0, r1, lsl #2 ; move src pointer down by 4 lines - add r2, r3, r1 - add r1, r1, r1 + sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines + add r2, r3, r1, lsr #1 ; set to 3 lines down vld1.u8 {d3}, [r3@64], r1 ; p3 vld1.u8 {d4}, [r2@64], r1 ; p2