Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
BC
public
external
libvpx
Commits
6a602490
Commit
6a602490
authored
11 years ago
by
Ronald S. Bultje
Committed by
Gerrit Code Review
11 years ago
Browse files
Options
Download
Plain Diff
Merge "SSE/SSE2 assembly for 4x4/8x8/16x16/32x32 TM intra prediction."
parents
865ca766
8dade638
v1.14.0-linphone
1.4.X
experimental
feature/update_to_v1.9.0-linphone
feature/uwp_nuget
forest
frame_parallel
highbitdepth
indianrunnerduck
javanwhistlingduck
khakicampbell
linphone
linphone-android
linphone-old
longtailedduck
m31-baseline
m49-2623
m52-2743
m54-2840
m56-2924
m66-3359
m68-3440
mandarinduck
mcw
mcw2
nextgen
nextgenv2
pcs-2013
playground
sandbox/Jingning/experimental
sandbox/Jingning/transcode
sandbox/Jingning/vpx
sandbox/aconverse@google.com/ansbench
sandbox/debargha/playground
sandbox/hkuang/frame_parallel
sandbox/hkuang@google.com/decode
sandbox/jimbankoski@google.com/proposed-aom
sandbox/jingning@google.com/decoder_test_suite
sandbox/jingning@google.com/experimental
sandbox/jzern@google.com/test
sandbox/wangch@google.com/vp9
sandbox/yaowu@google.com/mergeaom
stable-vp9-decoder
v1.12.0-linphone
v1.6.1_linphone
v1.7.0-linphone
v1.9.0-linphone
v1.9.0
v1.9.0-rc1
v1.8.2
v1.8.1
v1.8.0
v1.7.0
v1.6.1
v1.6.0
v1.5.0
v1.4.0
v1.3.0
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
vp9/common/vp9_rtcd_defs.sh
+6
-6
vp9/common/vp9_rtcd_defs.sh
vp9/common/x86/vp9_intrapred_sse2.asm
+151
-0
vp9/common/x86/vp9_intrapred_sse2.asm
with
157 additions
and
6 deletions
vp9/common/vp9_rtcd_defs.sh
+
6
−
6
View file @
6a602490
...
...
@@ -22,6 +22,8 @@ EOF
}
forward_decls vp9_common_forward_decls
[
$arch
=
"x86_64"
]
&&
mmx_x86_64
=
mmx
&&
sse2_x86_64
=
sse2
#
# Dequant
#
...
...
@@ -77,7 +79,7 @@ prototype void vp9_v_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint
specialize vp9_v_predictor_4x4 sse
prototype void vp9_tm_predictor_4x4
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_tm_predictor_4x4
specialize vp9_tm_predictor_4x4
sse
prototype void vp9_dc_predictor_4x4
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_dc_predictor_4x4 sse
...
...
@@ -116,7 +118,7 @@ prototype void vp9_v_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint
specialize vp9_v_predictor_8x8 sse
prototype void vp9_tm_predictor_8x8
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_tm_predictor_8x8
specialize vp9_tm_predictor_8x8
sse2
prototype void vp9_dc_predictor_8x8
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_dc_predictor_8x8 sse
...
...
@@ -155,7 +157,7 @@ prototype void vp9_v_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_v_predictor_16x16 sse2
prototype void vp9_tm_predictor_16x16
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_tm_predictor_16x16
specialize vp9_tm_predictor_16x16
sse2
prototype void vp9_dc_predictor_16x16
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_dc_predictor_16x16 sse2
...
...
@@ -194,7 +196,7 @@ prototype void vp9_v_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_v_predictor_32x32 sse2
prototype void vp9_tm_predictor_32x32
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_tm_predictor_32x32
specialize vp9_tm_predictor_32x32
sse2_x86_64
prototype void vp9_dc_predictor_32x32
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_dc_predictor_32x32 sse2
...
...
@@ -362,8 +364,6 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
# variance
[
$arch
=
"x86_64"
]
&&
mmx_x86_64
=
mmx
&&
sse2_x86_64
=
sse2
prototype unsigned int vp9_variance32x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x16 sse2
...
...
This diff is collapsed.
Click to expand it.
vp9/common/x86/vp9_intrapred_sse2.asm
+
151
−
0
View file @
6a602490
...
...
@@ -188,3 +188,154 @@ cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
dec
nlines4d
jnz
.loop
REP_RET
INIT_MMX
ss
e
cglobal
tm_predictor_4x4
,
4
,
4
,
4
,
ds
t
,
stride
,
above
,
left
pxor
m1
,
m1
movd
m2
,
[
aboveq
-
1
]
movd
m0
,
[
aboveq
]
punpcklbw
m2
,
m1
punpcklbw
m0
,
m1
pshufw
m2
,
m2
,
0x0
DEFINE_ARGS
ds
t
,
stride
,
line
,
left
mov
lineq
,
-
2
add
leftq
,
4
psubw
m0
,
m2
.loop:
movd
m2
,
[
leftq
+
lineq
*
2
]
movd
m3
,
[
leftq
+
lineq
*
2
+
1
]
punpcklbw
m2
,
m1
punpcklbw
m3
,
m1
pshufw
m2
,
m2
,
0x0
pshufw
m3
,
m3
,
0x0
paddw
m2
,
m0
paddw
m3
,
m0
packuswb
m2
,
m2
packuswb
m3
,
m3
movd
[
ds
tq
],
m2
movd
[
ds
tq
+
strideq
],
m3
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
inc
lineq
jnz
.loop
REP_RET
INIT_XMM
ss
e2
cglobal
tm_predictor_8x8
,
4
,
4
,
4
,
ds
t
,
stride
,
above
,
left
pxor
m1
,
m1
movd
m2
,
[
aboveq
-
1
]
movq
m0
,
[
aboveq
]
punpcklbw
m2
,
m1
punpcklbw
m0
,
m1
pshuflw
m2
,
m2
,
0x0
DEFINE_ARGS
ds
t
,
stride
,
line
,
left
mov
lineq
,
-
4
punpcklqdq
m2
,
m2
add
leftq
,
8
psubw
m0
,
m2
.loop:
movd
m2
,
[
leftq
+
lineq
*
2
]
movd
m3
,
[
leftq
+
lineq
*
2
+
1
]
punpcklbw
m2
,
m1
punpcklbw
m3
,
m1
pshuflw
m2
,
m2
,
0x0
pshuflw
m3
,
m3
,
0x0
punpcklqdq
m2
,
m2
punpcklqdq
m3
,
m3
paddw
m2
,
m0
paddw
m3
,
m0
packuswb
m2
,
m3
movq
[
ds
tq
],
m2
movhps
[
ds
tq
+
strideq
],
m2
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
inc
lineq
jnz
.loop
REP_RET
INIT_XMM
ss
e2
cglobal
tm_predictor_16x16
,
4
,
4
,
7
,
ds
t
,
stride
,
above
,
left
pxor
m1
,
m1
movd
m2
,
[
aboveq
-
1
]
mova
m0
,
[
aboveq
]
punpcklbw
m2
,
m1
punpckhbw
m4
,
m0
,
m1
punpcklbw
m0
,
m1
pshuflw
m2
,
m2
,
0x0
DEFINE_ARGS
ds
t
,
stride
,
line
,
left
mov
lineq
,
-
8
punpcklqdq
m2
,
m2
add
leftq
,
16
psubw
m0
,
m2
psubw
m4
,
m2
.loop:
movd
m2
,
[
leftq
+
lineq
*
2
]
movd
m3
,
[
leftq
+
lineq
*
2
+
1
]
punpcklbw
m2
,
m1
punpcklbw
m3
,
m1
pshuflw
m2
,
m2
,
0x0
pshuflw
m3
,
m3
,
0x0
punpcklqdq
m2
,
m2
punpcklqdq
m3
,
m3
paddw
m5
,
m2
,
m0
paddw
m6
,
m3
,
m0
paddw
m2
,
m4
paddw
m3
,
m4
packuswb
m5
,
m2
packuswb
m6
,
m3
mova
[
ds
tq
],
m5
mova
[
ds
tq
+
strideq
],
m6
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
inc
lineq
jnz
.loop
REP_RET
%if ARCH_X86_64
INIT_XMM
ss
e2
cglobal
tm_predictor_32x32
,
4
,
4
,
10
,
ds
t
,
stride
,
above
,
left
pxor
m1
,
m1
movd
m2
,
[
aboveq
-
1
]
mova
m0
,
[
aboveq
]
mova
m4
,
[
aboveq
+
16
]
punpcklbw
m2
,
m1
punpckhbw
m3
,
m0
,
m1
punpckhbw
m5
,
m4
,
m1
punpcklbw
m0
,
m1
punpcklbw
m4
,
m1
pshuflw
m2
,
m2
,
0x0
DEFINE_ARGS
ds
t
,
stride
,
line
,
left
mov
lineq
,
-
16
punpcklqdq
m2
,
m2
add
leftq
,
32
psubw
m0
,
m2
psubw
m3
,
m2
psubw
m4
,
m2
psubw
m5
,
m2
.loop:
movd
m2
,
[
leftq
+
lineq
*
2
]
movd
m6
,
[
leftq
+
lineq
*
2
+
1
]
punpcklbw
m2
,
m1
punpcklbw
m6
,
m1
pshuflw
m2
,
m2
,
0x0
pshuflw
m6
,
m6
,
0x0
punpcklqdq
m2
,
m2
punpcklqdq
m6
,
m6
paddw
m7
,
m2
,
m0
paddw
m8
,
m2
,
m3
paddw
m9
,
m2
,
m4
paddw
m2
,
m5
packuswb
m7
,
m8
packuswb
m9
,
m2
paddw
m2
,
m6
,
m0
paddw
m8
,
m6
,
m3
mova
[
ds
tq
],
m7
paddw
m7
,
m6
,
m4
paddw
m6
,
m5
mova
[
ds
tq
+
16
],
m9
packuswb
m2
,
m8
packuswb
m7
,
m6
mova
[
ds
tq
+
strideq
],
m2
mova
[
ds
tq
+
strideq
+
16
],
m7
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
inc
lineq
jnz
.loop
REP_RET
%endif
This diff is collapsed.
Click to expand it.
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets