Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
BC
public
external
libvpx
Commits
a522be29
Commit
a522be29
authored
14 years ago
by
Johann
Committed by
Code Review
14 years ago
Browse files
Options
Download
Plain Diff
Merge "fix armv6 simpleloop filter"
parents
6ea5bb85
467a0b99
v1.14.0-linphone
1.4.X
aylesbury
bali
cayuga
eider
experimental
feature/update_to_v1.9.0-linphone
feature/uwp_nuget
forest
frame_parallel
highbitdepth
indianrunnerduck
javanwhistlingduck
khakicampbell
linphone
linphone-android
linphone-old
longtailedduck
m29-baseline
m31-baseline
m49-2623
m52-2743
m54-2840
m56-2924
m66-3359
m68-3440
mandarinduck
mcw
mcw2
nextgen
nextgenv2
pcs-2013
playground
sandbox/Jingning/experimental
sandbox/Jingning/transcode
sandbox/Jingning/vpx
sandbox/aconverse@google.com/ansbench
sandbox/atna/dec_sem_sync
sandbox/awatry/initial_opencl_implementation
sandbox/debargha/playground
sandbox/hkuang/frame_parallel
sandbox/hkuang@google.com/decode
sandbox/hlundin/error-concealment
sandbox/holmer/error-concealment
sandbox/jimbankoski@google.com/proposed-aom
sandbox/jingning@google.com/decoder_test_suite
sandbox/jingning@google.com/experimental
sandbox/jkoleszar/cached-multibit
sandbox/jkoleszar/experimental-knobs
sandbox/jkoleszar/new-rate-control
sandbox/jkoleszar/new-rtcd
sandbox/jkoleszar/reuse-modemv
sandbox/jkoleszar/use-memcpy
sandbox/jzern@google.com/test
sandbox/slavarnway/test
sandbox/wangch@google.com/vp9
sandbox/yaowu@google.com/mergeaom
stable-vp9-decoder
v1.12.0-linphone
v1.6.1_linphone
v1.7.0-linphone
v1.9.0-linphone
vp9-preview
v1.9.0
v1.9.0-rc1
v1.8.2
v1.8.1
v1.8.0
v1.7.0
v1.6.1
v1.6.0
v1.5.0
v1.4.0
v1.3.0
v1.2.0
v1.1.0
v1.0.0
v0.9.7
v0.9.7-p1
v0.9.6
v0.9.5
v0.9.2
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
vp8/common/arm/armv6/simpleloopfilter_v6.asm
+47
-68
vp8/common/arm/armv6/simpleloopfilter_v6.asm
with
47 additions
and
68 deletions
vp8/common/arm/armv6/simpleloopfilter_v6.asm
+
47
−
68
View file @
a522be29
...
...
@@ -63,23 +63,22 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
sub
src
,
src
,
pstep
,
lsl
#
1
; move src pointer down by 2 lines
ldr
r12
,
[
r3
]
; limit
ldr
r3
,
[
src
,
-
pstep
,
lsl
#
1
]
; p1
ldr
r
12
,
[
r3
]
,
#
4
;
limit
ldr
r
3
,
[
src
]
,
pstep
; p
1
ldr
r
9
,
[
sp
,
#
4
0
]
;
count for 8-in-parallel
ldr
r
4
,
[
src
,
-
pstep
]
; p
0
ldr
r9
,
[
sp
,
#
36
]
; count for 8-in-parallel
ldr
r4
,
[
src
],
pstep
; p0
ldr
r7
,
[
r2
],
#
4
; flimit
ldr
r5
,
[
src
],
pstep
; q0
ldr
r7
,
[
r2
]
; flimit
ldr
r5
,
[
src
]
; q0
ldr
r2
,
c0x80808080
ldr
r6
,
[
src
]
; q1
ldr
r6
,
[
src
,
pstep
]
; q1
uadd8
r7
,
r7
,
r7
; flimit * 2
mov
r9
,
r9
,
lsl
#
1
;
4-in-parallel
mov
r9
,
r9
,
lsl
#
1
;
double the count. we're doing 4 at a time
uadd8
r12
,
r7
,
r12
; flimit * 2 + limit
mov
lr
,
#
0
|
simple_hnext8
|
; vp8_simple_filter_mask() function
...
...
@@ -89,22 +88,19 @@ pstep RN r1
uqsub8
r10
,
r4
,
r5
; p0 - q0
uqsub8
r11
,
r5
,
r4
; q0 - p0
orr
r8
,
r8
,
r7
; abs(p1 - q1)
ldr
lr
,
c0x7F7F7F7F
; 01111111 mask
orr
r10
,
r10
,
r11
; abs(p0 - q0)
and
r8
,
lr
,
r8
,
l
s
r
#
1
; abs(p1 - q
1
)
/ 2
uhadd8
r8
,
r8
,
lr
; abs(p1 - q
2
)
>> 1
uqadd8
r10
,
r10
,
r10
; abs(p0 - q0) * 2
mvn
lr
,
#
0
; r10 == -1
; STALL waiting on r10
uqadd8
r10
,
r10
,
r8
; abs(p0 - q0)*2 + abs(p1 - q1)/2
; STALL waiting on r10 :(
uqsub8
r10
,
r10
,
r12
; compare to flimit
mov
r8
,
#
0
usub8
r10
,
r8
,
r10
; use usub8 instead of ssub8
; STALL (maybe?) when are flags set? :/
sel
r10
,
lr
,
r8
; filter mask: lr
; STALL waiting on r10
mvn
r8
,
#
0
uqsub8
r10
,
r10
,
r12
; compare to flimit. need to do this twice because uqsub8 doesn't set GE flags
; and usub8 doesn't saturate
usub8
r10
,
lr
,
r10
; set GE flags for each byte
sel
r10
,
r8
,
lr
; filter mask: F or 0
cmp
r10
,
#
0
beq
si
mple_hskip_filter
; skip filtering
beq
si
mple_hskip_filter
; skip filtering
if we're &ing with 0s. would just write out the same values
;vp8_simple_filter() function
...
...
@@ -113,55 +109,45 @@ pstep RN r1
eor
r4
,
r4
,
r2
; p0 offset to convert to a signed value
eor
r5
,
r5
,
r2
; q0 offset to convert to a signed value
qsub8
r3
,
r3
,
r6
; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
qsub8
r6
,
r5
,
r4
; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
qsub8
r3
,
r3
,
r6
; vp8_signed_char_clamp(p1-q1)
qsub8
r6
,
r5
,
r4
; vp8_signed_char_clamp(q0-p0)
qadd8
r3
,
r3
,
r6
; += q0-p0
qadd8
r3
,
r3
,
r6
; += q0-p0
qadd8
r3
,
r3
,
r6
; p1-q1 + 3*(q0-p0))
and
r3
,
r3
,
r10
; &= mask
qadd8
r3
,
r3
,
r6
ldr
r8
,
c0x03030303
; r8 = 3
qadd8
r3
,
r3
,
r6
ldr
r7
,
c0x04040404
qadd8
r3
,
r3
,
r6
and
r3
,
r3
,
lr
; vp8_filter &= mask;
ldr
r8
,
c0x03030303
;save bottom 3 bits so that we round one side +4 and the other +3
qadd8
r7
,
r3
,
r7
; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
qadd8
r8
,
r3
,
r8
; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
qadd8
r3
,
r3
,
r7
; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
mov
r
7
,
#
0
shadd8
r
8
,
r
8
,
r
7
; Filter2 >>=
3
shadd8
r
3
,
r
3
,
r
7
; Filter1 >>=
3
shadd8
r
8
,
r
8
,
r
7
shadd8
r
3
,
r
3
,
r
7
shadd8
r
8
,
r
8
,
r
7
;
r8:
Filter
2
shadd8
r
3
,
r
3
,
r
7
;
r7: filter1
mov
r
3
,
#
0
shadd8
r
7
,
r
7
,
r3
shadd8
r
8
,
r
8
,
r3
shadd8
r
7
,
r
7
,
r
3
shadd8
r
8
,
r
8
,
r
3
shadd8
r
7
,
r
7
,
r
3
; Filter
1 >>= 3
shadd8
r
8
,
r
8
,
r
3
;
Filter2 >>= 3
;calculate output
sub
src
,
src
,
pstep
,
lsl
#
1
qsub8
r5
,
r5
,
r7
; u = vp8_signed_char_clamp(q0 - Filter1)
qadd8
r4
,
r4
,
r8
; u = vp8_signed_char_clamp(p0 + Filter2)
qsub8
r5
,
r5
,
r3
; u = vp8_signed_char_clamp(q0 - Filter1)
eor
r4
,
r4
,
r2
; *op0 = u^0x80
str
r4
,
[
src
],
pstep
; store op0 result
eor
r5
,
r5
,
r2
; *oq0 = u^0x80
str
r5
,
[
src
],
pstep
; store oq0 result
str
r5
,
[
src
]
; store oq0 result
eor
r4
,
r4
,
r2
; *op0 = u^0x80
str
r4
,
[
src
,
-
pstep
]
; store op0 result
|
simple_hskip_filter
|
add
src
,
src
,
#
4
sub
src
,
src
,
pstep
sub
src
,
src
,
pstep
,
lsl
#
1
subs
r9
,
r9
,
#
1
addne
src
,
src
,
#
4
; next row
;pld [src]
;pld [src, pstep]
;pld [src, pstep, lsl #1]
ldrne
r3
,
[
src
],
pstep
; p1
ldrne
r4
,
[
src
],
pstep
; p0
ldrne
r5
,
[
src
],
pstep
; q0
ldrne
r6
,
[
src
]
; q1
ldrne
r3
,
[
src
,
-
pstep
,
lsl
#
1
]
; p1
ldrne
r4
,
[
src
,
-
pstep
]
; p0
ldrne
r5
,
[
src
]
; q0
ldrne
r6
,
[
src
,
pstep
]
; q1
bne
si
mple_hnext8
...
...
@@ -174,9 +160,9 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r12
,
[
r2
]
,
#
4
; r12: flimit
ldr
r12
,
[
r2
]
; r12: flimit
ldr
r2
,
c0x80808080
ldr
r7
,
[
r3
]
,
#
4
; limit
ldr
r7
,
[
r3
]
; limit
; load soure data to r7, r8, r9, r10
ldrh
r3
,
[
src
,
#
-
2
]
...
...
@@ -213,16 +199,15 @@ pstep RN r1
uqsub8
r10
,
r5
,
r4
; q0 - p0
orr
r7
,
r7
,
r8
; abs(p1 - q1)
orr
r9
,
r9
,
r10
; abs(p0 - q0)
ldr
lr
,
c0x7F7F7F7F
; 0111 1111 mask
uqadd8
r9
,
r9
,
r9
; abs(p0 - q0) * 2
and
r7
,
lr
,
r7
,
lsr
#
1
; abs(p1 - q1) / 2
mov
r8
,
#
0
uqadd8
r9
,
r9
,
r9
; abs(p0 - q0) * 2
uhadd8
r7
,
r7
,
r8
; abs(p1 - q1) / 2
uqadd8
r7
,
r7
,
r9
; abs(p0 - q0)*2 + abs(p1 - q1)/2
mvn
r10
,
#
0
; r10 == -1
uqsub8
r7
,
r7
,
r12
; compare to flimit
usub8
r7
,
r8
,
r7
sel
r
7
,
r10
,
r8
; filter mask
: lr
sel
l
r
,
r10
,
r8
; filter mask
cmp
lr
,
#
0
beq
si
mple_vskip_filter
; skip filtering
...
...
@@ -286,10 +271,6 @@ pstep RN r1
|
simple_vskip_filter
|
subs
r11
,
r11
,
#
1
;pld [src]
;pld [src, pstep]
;pld [src, pstep, lsl #1]
; load soure data to r7, r8, r9, r10
ldrneh
r3
,
[
src
,
#
-
2
]
ldrneh
r4
,
[
src
],
pstep
...
...
@@ -316,7 +297,5 @@ pstep RN r1
c0x80808080
DCD
0x80808080
c0x03030303
DCD
0x03030303
c0x04040404
DCD
0x04040404
c0x01010101
DCD
0x01010101
c0x7F7F7F7F
DCD
0x7F7F7F7F
END
This diff is collapsed.
Click to expand it.
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets