Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
ba8fc719
Commit
ba8fc719
authored
Aug 26, 2013
by
hkuang
Committed by
Gerrit Code Review
Aug 26, 2013
Browse files
Merge "Add neon optimize vp9_short_idct4x4_1_add."
parents
bbb490f6
69384f4f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
70 additions
and
1 deletion
+70
-1
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
+68
-0
vp9/common/vp9_rtcd_defs.sh
vp9/common/vp9_rtcd_defs.sh
+1
-1
vp9/vp9_common.mk
vp9/vp9_common.mk
+1
-0
No files found.
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
0 → 100644
View file @
ba8fc719
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT
|
vp9_short_idct4x4_1_add_neon
|
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|
vp9_short_idct4x4_1_add_neon
|
PROC
ldrsh
r0
,
[
r0
]
; generate cospi_16_64 = 11585
mov
r12
,
#
0x2d00
add
r12
,
#
0x41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul
r0
,
r0
,
r12
; input[0] * cospi_16_64
add
r0
,
r0
,
#
0x2000
; +(1 << ((DCT_CONST_BITS) - 1))
asr
r0
,
r0
,
#
14
; >> DCT_CONST_BITS
; out = dct_const_round_shift(out * cospi_16_64)
mul
r0
,
r0
,
r12
; out * cospi_16_64
mov
r12
,
r1
; save dest
add
r0
,
r0
,
#
0x2000
; +(1 << ((DCT_CONST_BITS) - 1))
asr
r0
,
r0
,
#
14
; >> DCT_CONST_BITS
; a1 = ROUND_POWER_OF_TWO(out, 4)
add
r0
,
r0
,
#
8
; + (1 <<((4) - 1))
asr
r0
,
r0
,
#
4
; >> 4
vdup.s16
q0
,
r0
; duplicate a1
vld1.32
{
d2
[
0
]
}
,
[
r1
],
r2
vld1.32
{
d2
[
1
]
}
,
[
r1
],
r2
vld1.32
{
d4
[
0
]
}
,
[
r1
],
r2
vld1.32
{
d4
[
1
]
}
,
[
r1
]
vaddw.u8
q8
,
q0
,
d2
; dest[x] + a1
vaddw.u8
q9
,
q0
,
d4
vqmovun.s16
d6
,
q8
; clip_pixel
vqmovun.s16
d7
,
q9
vst1.32
{
d6
[
0
]
}
,
[
r12
],
r2
vst1.32
{
d6
[
1
]
}
,
[
r12
],
r2
vst1.32
{
d7
[
0
]
}
,
[
r12
],
r2
vst1.32
{
d7
[
1
]
}
,
[
r12
]
bx
lr
ENDP
; |vp9_short_idct4x4_1_add_neon|
END
vp9/common/vp9_rtcd_defs.sh
View file @
ba8fc719
...
...
@@ -295,7 +295,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon
# dct
#
prototype void vp9_short_idct4x4_1_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_1_add sse2
specialize vp9_short_idct4x4_1_add sse2
neon
prototype void vp9_short_idct4x4_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2 neon
...
...
vp9/vp9_common.mk
View file @
ba8fc719
...
...
@@ -96,6 +96,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_convolve8_avg_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_loopfilter_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_dc_only_idct_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct4x4_1_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct4x4_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct8x8_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct16x16_add_neon
$(ASM)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment