Commit 664d9921 authored by Attila Nagy's avatar Attila Nagy
Browse files

Fix: NEON copy/extend frame for small sizes

NEON version of copyframeyonly, extendframeborders, copy_frame_func were
not working for plane stride < 128 and/or y_width < 128.

Change-Id: Id6c2e6c795274da0c90134b15c0d5f62d1b17a6c
Showing with 25 additions and 13 deletions
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
AREA ||.text||, CODE, READONLY, ALIGN=2 AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); ;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
; YV12_BUFFER_CONFIG *dst_ybc);
|vp8_yv12_copy_frame_func_neon| PROC |vp8_yv12_copy_frame_func_neon| PROC
push {r4 - r11, lr} push {r4 - r11, lr}
...@@ -52,7 +53,8 @@ cp_src_to_dst_height_loop ...@@ -52,7 +53,8 @@ cp_src_to_dst_height_loop
mov r9, r3 mov r9, r3
add r10, r2, r6 add r10, r2, r6
add r11, r3, r7 add r11, r3, r7
mov r12, r5, lsr #7 movs r12, r5, lsr #7
ble extra_cp_needed ; y_width < 128
cp_src_to_dst_width_loop cp_src_to_dst_width_loop
vld1.8 {q0, q1}, [r8]! vld1.8 {q0, q1}, [r8]!
...@@ -83,6 +85,7 @@ cp_src_to_dst_width_loop ...@@ -83,6 +85,7 @@ cp_src_to_dst_width_loop
bne cp_src_to_dst_height_loop bne cp_src_to_dst_height_loop
extra_cp_needed
ands r10, r5, #0x7f ;check to see if extra copy is needed ands r10, r5, #0x7f ;check to see if extra copy is needed
sub r11, r5, r10 sub r11, r5, r10
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
...@@ -110,7 +113,8 @@ cp_src_to_dst_height_uv_loop ...@@ -110,7 +113,8 @@ cp_src_to_dst_height_uv_loop
mov r9, r3 mov r9, r3
add r10, r2, r6 add r10, r2, r6
add r11, r3, r7 add r11, r3, r7
mov r12, r5, lsr #6 movs r12, r5, lsr #6
ble extra_uv_cp_needed
cp_src_to_dst_width_uv_loop cp_src_to_dst_width_uv_loop
vld1.8 {q0, q1}, [r8]! vld1.8 {q0, q1}, [r8]!
...@@ -133,6 +137,7 @@ cp_src_to_dst_width_uv_loop ...@@ -133,6 +137,7 @@ cp_src_to_dst_width_uv_loop
bne cp_src_to_dst_height_uv_loop bne cp_src_to_dst_height_uv_loop
extra_uv_cp_needed
ands r10, r5, #0x3f ;check to see if extra copy is needed ands r10, r5, #0x3f ;check to see if extra copy is needed
sub r11, r5, r10 sub r11, r5, r10
ldr r2, [sp] ;srcptr1 ldr r2, [sp] ;srcptr1
......
...@@ -42,7 +42,8 @@ cp_src_to_dst_height_loop ...@@ -42,7 +42,8 @@ cp_src_to_dst_height_loop
mov r9, r3 mov r9, r3
add r10, r2, r6 add r10, r2, r6
add r11, r3, r7 add r11, r3, r7
mov r12, r5, lsr #7 movs r12, r5, lsr #7
ble extra_cp_needed ; y_width < 128
cp_src_to_dst_width_loop cp_src_to_dst_width_loop
vld1.8 {q0, q1}, [r8]! vld1.8 {q0, q1}, [r8]!
...@@ -73,6 +74,7 @@ cp_src_to_dst_width_loop ...@@ -73,6 +74,7 @@ cp_src_to_dst_width_loop
bne cp_src_to_dst_height_loop bne cp_src_to_dst_height_loop
extra_cp_needed
ands r10, r5, #0x7f ;check to see if extra copy is needed ands r10, r5, #0x7f ;check to see if extra copy is needed
sub r11, r5, r10 sub r11, r5, r10
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
...@@ -419,7 +421,8 @@ cp_src_to_dst_height_loop1 ...@@ -419,7 +421,8 @@ cp_src_to_dst_height_loop1
mov r9, r3 mov r9, r3
add r10, r2, r6 add r10, r2, r6
add r11, r3, r7 add r11, r3, r7
mov r12, r5, lsr #7 movs r12, r5, lsr #7
ble extra_copy_needed ; y_width < 128
cp_src_to_dst_width_loop1 cp_src_to_dst_width_loop1
vld1.8 {q0, q1}, [r8]! vld1.8 {q0, q1}, [r8]!
...@@ -450,6 +453,7 @@ cp_src_to_dst_width_loop1 ...@@ -450,6 +453,7 @@ cp_src_to_dst_width_loop1
bne cp_src_to_dst_height_loop1 bne cp_src_to_dst_height_loop1
extra_copy_needed
ands r10, r5, #0x7f ;check to see if extra copy is needed ands r10, r5, #0x7f ;check to see if extra copy is needed
sub r11, r5, r10 sub r11, r5, r10
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
......
...@@ -75,12 +75,13 @@ copy_left_right_y ...@@ -75,12 +75,13 @@ copy_left_right_y
mul r8, r4, lr ; plane_height * plane_stride mul r8, r4, lr ; plane_height * plane_stride
; copy width is plane_stride ; copy width is plane_stride
mov r12, lr, lsr #7 ; plane_stride / 128 movs r12, lr, lsr #7 ; plane_stride / 128
sub r1, r1, #32 ; src_ptr1 = y_buffer - Border sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride)) add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
ble extra_y_copy_needed ; plane stride < 128
copy_top_bottom_y copy_top_bottom_y
vld1.8 {q0, q1}, [r1]! vld1.8 {q0, q1}, [r1]!
...@@ -119,6 +120,7 @@ top_bottom_32 ...@@ -119,6 +120,7 @@ top_bottom_32
subs r12, r12, #1 subs r12, r12, #1
bne copy_top_bottom_y bne copy_top_bottom_y
extra_y_copy_needed
mov r7, lr, lsr #4 ; check to see if extra copy is needed mov r7, lr, lsr #4 ; check to see if extra copy is needed
ands r7, r7, #0x7 ands r7, r7, #0x7
bne extra_top_bottom_y bne extra_top_bottom_y
...@@ -184,12 +186,13 @@ copy_left_right_uv ...@@ -184,12 +186,13 @@ copy_left_right_uv
;Now copy the top and bottom source lines into each line of the respective borders ;Now copy the top and bottom source lines into each line of the respective borders
mov r1, r7 mov r1, r7
mul r8, r4, lr ; plane_height * plane_stride mul r8, r4, lr ; plane_height * plane_stride
mov r12, lr, lsr #6 ; plane_stride / 64 movs r12, lr, lsr #6 ; plane_stride / 64
sub r1, r1, #16 ; src_ptr1 = u_buffer - Border sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride) add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
ble extra_uv_copy_needed ; plane_stride < 64
copy_top_bottom_uv copy_top_bottom_uv
vld1.8 {q0, q1}, [r1]! vld1.8 {q0, q1}, [r1]!
...@@ -219,7 +222,7 @@ top_bottom_16 ...@@ -219,7 +222,7 @@ top_bottom_16
subs r12, r12, #1 subs r12, r12, #1
bne copy_top_bottom_uv bne copy_top_bottom_uv
extra_uv_copy_needed
mov r7, lr, lsr #3 ; check to see if extra copy is needed mov r7, lr, lsr #3 ; check to see if extra copy is needed
ands r7, r7, #0x7 ands r7, r7, #0x7
bne extra_top_bottom_uv bne extra_top_bottom_uv
......
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
#include "vpx_scale/vpxscale.h" #include "vpx_scale/vpxscale.h"
void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc);
void void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) YV12_BUFFER_CONFIG *dst_ybc)
{ {
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
//printf("Border:%d; plane_stride:%d; plane_height:%d; plane_width:%d\n",dst_ybc->border,dst_ybc->y_stride,dst_ybc->y_height,dst_ybc->y_width);
vp8_yv12_extend_frame_borders_ptr(dst_ybc); vp8_yv12_extend_frame_borders_neon(dst_ybc);
} }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment