Commit da8eb716 authored by Johann Koenig's avatar Johann Koenig Committed by Code Review
Browse files

Merge "adds preload for armv6 encoder asm"

parents d1c0ba8f 5305e79e
Branches
Tags
No related merge requests found
Showing with 48 additions and 1 deletion
...@@ -27,8 +27,11 @@ ...@@ -27,8 +27,11 @@
|vp8_mse16x16_armv6| PROC |vp8_mse16x16_armv6| PROC
push {r4-r9, lr} push {r4-r9, lr}
mov r12, #16 ; set loop counter to 16 (=block height)
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #16 ; set loop counter to 16 (=block height)
mov r4, #0 ; initialize sse = 0 mov r4, #0 ; initialize sse = 0
loop loop
...@@ -39,8 +42,10 @@ loop ...@@ -39,8 +42,10 @@ loop
mov lr, #0 ; constant zero mov lr, #0 ; constant zero
usub8 r8, r5, r6 ; calculate difference usub8 r8, r5, r6 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r8, lr ; select bytes with positive difference sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands usub8 r9, r6, r5 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
...@@ -24,6 +24,12 @@ ...@@ -24,6 +24,12 @@
; stack max_sad (not used) ; stack max_sad (not used)
|vp8_sad16x16_armv6| PROC |vp8_sad16x16_armv6| PROC
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
mov r4, #0 ; sad = 0; mov r4, #0 ; sad = 0;
mov r5, #8 ; loop count mov r5, #8 ; loop count
...@@ -45,6 +51,9 @@ loop ...@@ -45,6 +51,9 @@ loop
add r0, r0, r1 ; set src pointer to next row add r0, r0, r1 ; set src pointer to next row
add r2, r2, r3 ; set dst pointer to next row add r2, r2, r3 ; set dst pointer to next row
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
...@@ -70,6 +79,9 @@ loop ...@@ -70,6 +79,9 @@ loop
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
subs r5, r5, #1 ; decrement loop counter subs r5, r5, #1 ; decrement loop counter
add r4, r4, r8 ; add partial sad values add r4, r4, r8 ; add partial sad values
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
|vp8_variance16x16_armv6| PROC |vp8_variance16x16_armv6| PROC
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0 mov r8, #0 ; initialize sum = 0
mov r11, #0 ; initialize sse = 0 mov r11, #0 ; initialize sse = 0
mov r12, #16 ; set loop counter to 16 (=block height) mov r12, #16 ; set loop counter to 16 (=block height)
...@@ -37,8 +41,10 @@ loop ...@@ -37,8 +41,10 @@ loop
mov lr, #0 ; constant zero mov lr, #0 ; constant zero
usub8 r6, r4, r5 ; calculate difference usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands usub8 r9, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r9, lr ; select bytes with negative difference sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
...@@ -23,6 +23,10 @@ ...@@ -23,6 +23,10 @@
|vp8_variance8x8_armv6| PROC |vp8_variance8x8_armv6| PROC
push {r4-r10, lr} push {r4-r10, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #8 ; set loop counter to 8 (=block height) mov r12, #8 ; set loop counter to 8 (=block height)
mov r4, #0 ; initialize sum = 0 mov r4, #0 ; initialize sum = 0
mov r5, #0 ; initialize sse = 0 mov r5, #0 ; initialize sse = 0
...@@ -35,8 +39,10 @@ loop ...@@ -35,8 +39,10 @@ loop
mov lr, #0 ; constant zero mov lr, #0 ; constant zero
usub8 r8, r6, r7 ; calculate difference usub8 r8, r6, r7 ; calculate difference
pld [r0, r1, lsl #1]
sel r10, r8, lr ; select bytes with positive difference sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands usub8 r9, r7, r6 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_h_armv6| PROC |vp8_variance_halfpixvar16x16_h_armv6| PROC
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0 mov r8, #0 ; initialize sum = 0
ldr r10, c80808080 ldr r10, c80808080
mov r11, #0 ; initialize sse = 0 mov r11, #0 ; initialize sse = 0
...@@ -42,8 +46,10 @@ loop ...@@ -42,8 +46,10 @@ loop
eor r4, r4, r10 eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands usub8 r6, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_hv_armv6| PROC |vp8_variance_halfpixvar16x16_hv_armv6| PROC
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0 mov r8, #0 ; initialize sum = 0
ldr r10, c80808080 ldr r10, c80808080
mov r11, #0 ; initialize sse = 0 mov r11, #0 ; initialize sse = 0
...@@ -53,8 +57,10 @@ loop ...@@ -53,8 +57,10 @@ loop
eor r4, r4, r10 eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands usub8 r6, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
|vp8_variance_halfpixvar16x16_v_armv6| PROC |vp8_variance_halfpixvar16x16_v_armv6| PROC
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0 mov r8, #0 ; initialize sum = 0
ldr r10, c80808080 ldr r10, c80808080
mov r11, #0 ; initialize sse = 0 mov r11, #0 ; initialize sse = 0
...@@ -43,8 +47,10 @@ loop ...@@ -43,8 +47,10 @@ loop
eor r4, r4, r10 eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference sel r7, r6, lr ; select bytes with positive difference
usub8 r6, r5, r4 ; calculate difference with reversed operands usub8 r6, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r6, lr ; select bytes with negative difference sel r6, r6, lr ; select bytes with negative difference
; calculate partial sums ; calculate partial sums
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment