Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
BC
public
external
libvpx
Commits
b8f43aec
Commit
b8f43aec
authored
14 years ago
by
Scott LaVarnway
Committed by
Code Review
14 years ago
Browse files
Options
Download
Plain Diff
Merge "SSSE3 version of fast quantizer"
parents
90c505f2
ff4a71f4
v1.14.0-linphone
1.4.X
bali
cayuga
eider
experimental
feature/update_to_v1.9.0-linphone
feature/uwp_nuget
forest
frame_parallel
highbitdepth
indianrunnerduck
javanwhistlingduck
khakicampbell
linphone
linphone-android
linphone-old
longtailedduck
m29-baseline
m31-baseline
m49-2623
m52-2743
m54-2840
m56-2924
m66-3359
m68-3440
mandarinduck
mcw
mcw2
nextgen
nextgenv2
pcs-2013
playground
sandbox/Jingning/experimental
sandbox/Jingning/transcode
sandbox/Jingning/vpx
sandbox/aconverse@google.com/ansbench
sandbox/atna/dec_sem_sync
sandbox/awatry/initial_opencl_implementation
sandbox/debargha/playground
sandbox/hkuang/frame_parallel
sandbox/hkuang@google.com/decode
sandbox/hlundin/error-concealment
sandbox/holmer/error-concealment
sandbox/jimbankoski@google.com/proposed-aom
sandbox/jingning@google.com/decoder_test_suite
sandbox/jingning@google.com/experimental
sandbox/jkoleszar/cached-multibit
sandbox/jkoleszar/experimental-knobs
sandbox/jkoleszar/new-rate-control
sandbox/jkoleszar/new-rtcd
sandbox/jkoleszar/reuse-modemv
sandbox/jkoleszar/use-memcpy
sandbox/jzern@google.com/test
sandbox/slavarnway/test
sandbox/wangch@google.com/vp9
sandbox/yaowu@google.com/mergeaom
stable-vp9-decoder
v1.12.0-linphone
v1.6.1_linphone
v1.7.0-linphone
v1.9.0-linphone
vp9-preview
v1.9.0
v1.9.0-rc1
v1.8.2
v1.8.1
v1.8.0
v1.7.0
v1.6.1
v1.6.0
v1.5.0
v1.4.0
v1.3.0
v1.2.0
v1.1.0
v1.0.0
v0.9.7
v0.9.7-p1
v0.9.6
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
vp8/encoder/x86/quantize_ssse3.asm
+114
-0
vp8/encoder/x86/quantize_ssse3.asm
vp8/encoder/x86/x86_csystemdependent.c
+22
-0
vp8/encoder/x86/x86_csystemdependent.c
vp8/vp8cx.mk
+1
-0
vp8/vp8cx.mk
with
137 additions
and
0 deletions
vp8/encoder/x86/quantize_ssse3.asm
0 → 100755
+
114
−
0
View file @
b8f43aec
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
; short *qcoeff_ptr,short *dequant_ptr,
; short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
;
global
sym
(
vp8_fast_quantize_b_impl_ssse3
)
sym
(
vp8_fast_quantize_b_impl_ssse3
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
mov
rdx
,
arg
(
0
)
;coeff_ptr
mov
rdi
,
arg
(
3
)
;round_ptr
mov
rsi
,
arg
(
4
)
;quant_ptr
movdqa
xmm0
,
[
rdx
]
movdqa
xmm4
,
[
rdx
+
16
]
movdqa
xmm2
,
[
rdi
]
;round lo
movdqa
xmm3
,
[
rdi
+
16
]
;round hi
movdqa
xmm1
,
xmm0
movdqa
xmm5
,
xmm4
psraw
xmm0
,
15
;sign of z (aka sz)
psraw
xmm4
,
15
;sign of z (aka sz)
pabsw
xmm1
,
xmm1
pabsw
xmm5
,
xmm5
paddw
xmm1
,
xmm2
paddw
xmm5
,
xmm3
pmulhw
xmm1
,
[
rsi
]
pmulhw
xmm5
,
[
rsi
+
16
]
mov
rdi
,
arg
(
1
)
;qcoeff_ptr
mov
rcx
,
arg
(
2
)
;dequant_ptr
mov
rsi
,
arg
(
5
)
;dqcoeff_ptr
pxor
xmm1
,
xmm0
pxor
xmm5
,
xmm4
psubw
xmm1
,
xmm0
psubw
xmm5
,
xmm4
movdqa
[
rdi
],
xmm1
movdqa
[
rdi
+
16
],
xmm5
movdqa
xmm2
,
[
rcx
]
movdqa
xmm3
,
[
rcx
+
16
]
pxor
xmm4
,
xmm4
pmullw
xmm2
,
xmm1
pmullw
xmm3
,
xmm5
pcmpeqw
xmm1
,
xmm4
;non zero mask
pcmpeqw
xmm5
,
xmm4
;non zero mask
packsswb
xmm1
,
xmm5
pshufb
xmm1
,
[
GLOBAL
(
zz_shuf
)]
pmovmskb
edx
,
xmm1
; xor ecx, ecx
; mov eax, -1
;find_eob_loop:
; shr edx, 1
; jc fq_skip
; mov eax, ecx
;fq_skip:
; inc ecx
; cmp ecx, 16
; jne find_eob_loop
xor
rdi
,
rdi
mov
eax
,
-
1
xor
dx
,
ax
;flip the bits for bsr
bsr
eax
,
edx
movdqa
[
rsi
],
xmm2
;store dqcoeff
movdqa
[
rsi
+
16
],
xmm3
;store dqcoeff
sub
edi
,
edx
;check for all zeros in bit mask
sar
edi
,
31
;0 or -1
add
eax
,
1
and
eax
,
edi
;if the bit mask was all zero,
;then eob = 0
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
ret
SECTION
_RODATA
align
16
zz_shuf:
db
0
,
1
,
4
,
8
,
5
,
2
,
3
,
6
,
9
,
12
,
13
,
10
,
7
,
11
,
14
,
15
This diff is collapsed.
Click to expand it.
vp8/encoder/x86/x86_csystemdependent.c
+
22
−
0
View file @
b8f43aec
...
...
@@ -179,6 +179,25 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
int
vp8_fast_quantize_b_impl_ssse3
(
short
*
coeff_ptr
,
short
*
qcoeff_ptr
,
short
*
dequant_ptr
,
short
*
round_ptr
,
short
*
quant_ptr
,
short
*
dqcoeff_ptr
);
void
vp8_fast_quantize_b_ssse3
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
d
->
eob
=
vp8_fast_quantize_b_impl_ssse3
(
b
->
coeff
,
d
->
qcoeff
,
d
->
dequant
,
b
->
round
,
b
->
quant
,
d
->
dqcoeff
);
}
#endif
void
vp8_arch_x86_encoder_init
(
VP8_COMP
*
cpi
)
{
#if CONFIG_RUNTIME_CPU_DETECT
...
...
@@ -333,6 +352,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
cpi
->
rtcd
.
variance
.
sad16x16x3
=
vp8_sad16x16x3_ssse3
;
cpi
->
rtcd
.
variance
.
sad16x8x3
=
vp8_sad16x8x3_ssse3
;
cpi
->
rtcd
.
quantize
.
fastquantb
=
vp8_fast_quantize_b_ssse3
;
}
#endif
...
...
This diff is collapsed.
Click to expand it.
vp8/vp8cx.mk
+
1
−
0
View file @
b8f43aec
...
...
@@ -109,6 +109,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3)
+=
encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/sad_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/encodeopt.asm
...
...
This diff is collapsed.
Click to expand it.
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets