Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
BC
public
external
libvpx
Commits
e05b92c0
Commit
e05b92c0
authored
10 years ago
by
Dmitry Kovalev
Committed by
Gerrit Code Review
10 years ago
Browse files
Options
Download
Plain Diff
Merge "Removing half-variance asm functions which are not used."
parents
528a5c28
94f5491c
v1.14.0-linphone
1.4.X
feature/update_to_v1.9.0-linphone
feature/uwp_nuget
frame_parallel
highbitdepth
indianrunnerduck
javanwhistlingduck
khakicampbell
linphone
linphone-android
linphone-old
longtailedduck
m49-2623
m52-2743
m54-2840
m56-2924
m66-3359
m68-3440
mandarinduck
nextgen
nextgenv2
playground
sandbox/Jingning/experimental
sandbox/Jingning/transcode
sandbox/Jingning/vpx
sandbox/aconverse@google.com/ansbench
sandbox/hkuang/frame_parallel
sandbox/hkuang@google.com/decode
sandbox/jimbankoski@google.com/proposed-aom
sandbox/jingning@google.com/decoder_test_suite
sandbox/jingning@google.com/experimental
sandbox/jzern@google.com/test
sandbox/wangch@google.com/vp9
sandbox/yaowu@google.com/mergeaom
v1.12.0-linphone
v1.6.1_linphone
v1.7.0-linphone
v1.9.0-linphone
v1.9.0
v1.9.0-rc1
v1.8.2
v1.8.1
v1.8.0
v1.7.0
v1.6.1
v1.6.0
v1.5.0
v1.4.0
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
+0
-337
vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
vp9/encoder/x86/vp9_variance_impl_sse2.asm
+0
-333
vp9/encoder/x86/vp9_variance_impl_sse2.asm
vp9/encoder/x86/vp9_variance_sse2.c
+0
-60
vp9/encoder/x86/vp9_variance_sse2.c
vp9/vp9cx.mk
+0
-1
vp9/vp9cx.mk
with
0 additions
and
731 deletions
vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
deleted
100644 → 0
+
0
−
337
View file @
528a5c28
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp9_half_horiz_vert_variance16x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_horiz_vert_variance16x_h_sse2
)
PRIVATE
sym
(
vp9_half_horiz_vert_variance16x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr ;
mov
rdi
,
arg
(
2
)
;src_ptr ;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height ;
movsxd
rax
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
rdx
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
pxor
xmm0
,
xmm0
;
movdqu
xmm5
,
XMMWORD
PTR
[
rsi
]
movdqu
xmm3
,
XMMWORD
PTR
[
rsi
+
1
]
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3) horizontal line 1
lea
rsi
,
[
rsi
+
rax
]
.half_horiz_vert_variance16x_h_1:
movdqu
xmm1
,
XMMWORD
PTR
[
rsi
]
;
movdqu
xmm2
,
XMMWORD
PTR
[
rsi
+
1
]
;
pavgb
xmm1
,
xmm2
; xmm1 = avg(xmm1,xmm3) horizontal line i+1
pavgb
xmm5
,
xmm1
; xmm = vertical average of the above
movdqa
xmm4
,
xmm5
punpcklbw
xmm5
,
xmm0
; xmm5 = words of above
punpckhbw
xmm4
,
xmm0
movq
xmm3
,
QWORD
PTR
[
rdi
]
; xmm3 = d0,d1,d2..d7
punpcklbw
xmm3
,
xmm0
; xmm3 = words of above
psubw
xmm5
,
xmm3
; xmm5 -= xmm3
movq
xmm3
,
QWORD
PTR
[
rdi
+
8
]
punpcklbw
xmm3
,
xmm0
psubw
xmm4
,
xmm3
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
paddw
xmm6
,
xmm4
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
pmaddwd
xmm4
,
xmm4
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
paddd
xmm7
,
xmm4
movdqa
xmm5
,
xmm1
; save xmm1 for use on the next row
lea
rsi
,
[
rsi
+
rax
]
lea
rdi
,
[
rdi
+
rdx
]
sub
rcx
,
1
;
jnz
.half_horiz_vert_variance16x_h_1
;
pxor
xmm1
,
xmm1
pxor
xmm5
,
xmm5
punpcklwd
xmm0
,
xmm6
punpckhwd
xmm1
,
xmm6
psrad
xmm0
,
16
psrad
xmm1
,
16
paddd
xmm0
,
xmm1
movdqa
xmm1
,
xmm0
movdqa
xmm6
,
xmm7
punpckldq
xmm6
,
xmm5
punpckhdq
xmm7
,
xmm5
paddd
xmm6
,
xmm7
punpckldq
xmm0
,
xmm5
punpckhdq
xmm1
,
xmm5
paddd
xmm0
,
xmm1
movdqa
xmm7
,
xmm6
movdqa
xmm1
,
xmm0
psrldq
xmm7
,
8
psrldq
xmm1
,
8
paddd
xmm6
,
xmm7
paddd
xmm0
,
xmm1
mov
rsi
,
arg
(
5
)
;[Sum]
mov
rdi
,
arg
(
6
)
;[SSE]
movd
[
rsi
],
xmm0
movd
[
rdi
],
xmm6
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_half_vert_variance16x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_vert_variance16x_h_sse2
)
PRIVATE
sym
(
vp9_half_vert_variance16x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr
mov
rdi
,
arg
(
2
)
;src_ptr
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height
movsxd
rax
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
rdx
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
movdqu
xmm5
,
XMMWORD
PTR
[
rsi
]
lea
rsi
,
[
rsi
+
rax
]
pxor
xmm0
,
xmm0
.half_vert_variance16x_h_1:
movdqu
xmm3
,
XMMWORD
PTR
[
rsi
]
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3)
movdqa
xmm4
,
xmm5
punpcklbw
xmm5
,
xmm0
punpckhbw
xmm4
,
xmm0
movq
xmm2
,
QWORD
PTR
[
rdi
]
punpcklbw
xmm2
,
xmm0
psubw
xmm5
,
xmm2
movq
xmm2
,
QWORD
PTR
[
rdi
+
8
]
punpcklbw
xmm2
,
xmm0
psubw
xmm4
,
xmm2
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
paddw
xmm6
,
xmm4
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
pmaddwd
xmm4
,
xmm4
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
paddd
xmm7
,
xmm4
movdqa
xmm5
,
xmm3
lea
rsi
,
[
rsi
+
rax
]
lea
rdi
,
[
rdi
+
rdx
]
sub
rcx
,
1
jnz
.half_vert_variance16x_h_1
pxor
xmm1
,
xmm1
pxor
xmm5
,
xmm5
punpcklwd
xmm0
,
xmm6
punpckhwd
xmm1
,
xmm6
psrad
xmm0
,
16
psrad
xmm1
,
16
paddd
xmm0
,
xmm1
movdqa
xmm1
,
xmm0
movdqa
xmm6
,
xmm7
punpckldq
xmm6
,
xmm5
punpckhdq
xmm7
,
xmm5
paddd
xmm6
,
xmm7
punpckldq
xmm0
,
xmm5
punpckhdq
xmm1
,
xmm5
paddd
xmm0
,
xmm1
movdqa
xmm7
,
xmm6
movdqa
xmm1
,
xmm0
psrldq
xmm7
,
8
psrldq
xmm1
,
8
paddd
xmm6
,
xmm7
paddd
xmm0
,
xmm1
mov
rsi
,
arg
(
5
)
;[Sum]
mov
rdi
,
arg
(
6
)
;[SSE]
movd
[
rsi
],
xmm0
movd
[
rdi
],
xmm6
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_half_horiz_variance16x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_horiz_variance16x_h_sse2
)
PRIVATE
sym
(
vp9_half_horiz_variance16x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr ;
mov
rdi
,
arg
(
2
)
;src_ptr ;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height ;
movsxd
rax
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
rdx
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
pxor
xmm0
,
xmm0
;
.half_horiz_variance16x_h_1:
movdqu
xmm5
,
XMMWORD
PTR
[
rsi
]
; xmm5 = s0,s1,s2..s15
movdqu
xmm3
,
XMMWORD
PTR
[
rsi
+
1
]
; xmm3 = s1,s2,s3..s16
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3)
movdqa
xmm1
,
xmm5
punpcklbw
xmm5
,
xmm0
; xmm5 = words of above
punpckhbw
xmm1
,
xmm0
movq
xmm3
,
QWORD
PTR
[
rdi
]
; xmm3 = d0,d1,d2..d7
punpcklbw
xmm3
,
xmm0
; xmm3 = words of above
movq
xmm2
,
QWORD
PTR
[
rdi
+
8
]
punpcklbw
xmm2
,
xmm0
psubw
xmm5
,
xmm3
; xmm5 -= xmm3
psubw
xmm1
,
xmm2
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
paddw
xmm6
,
xmm1
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
pmaddwd
xmm1
,
xmm1
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
paddd
xmm7
,
xmm1
lea
rsi
,
[
rsi
+
rax
]
lea
rdi
,
[
rdi
+
rdx
]
sub
rcx
,
1
;
jnz
.half_horiz_variance16x_h_1
;
pxor
xmm1
,
xmm1
pxor
xmm5
,
xmm5
punpcklwd
xmm0
,
xmm6
punpckhwd
xmm1
,
xmm6
psrad
xmm0
,
16
psrad
xmm1
,
16
paddd
xmm0
,
xmm1
movdqa
xmm1
,
xmm0
movdqa
xmm6
,
xmm7
punpckldq
xmm6
,
xmm5
punpckhdq
xmm7
,
xmm5
paddd
xmm6
,
xmm7
punpckldq
xmm0
,
xmm5
punpckhdq
xmm1
,
xmm5
paddd
xmm0
,
xmm1
movdqa
xmm7
,
xmm6
movdqa
xmm1
,
xmm0
psrldq
xmm7
,
8
psrldq
xmm1
,
8
paddd
xmm6
,
xmm7
paddd
xmm0
,
xmm1
mov
rsi
,
arg
(
5
)
;[Sum]
mov
rdi
,
arg
(
6
)
;[SSE]
movd
[
rsi
],
xmm0
movd
[
rdi
],
xmm6
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
This diff is collapsed.
Click to expand it.
vp9/encoder/x86/vp9_variance_impl_sse2.asm
+
0
−
333
View file @
e05b92c0
...
@@ -398,337 +398,4 @@ sym(vp9_get8x8var_sse2):
...
@@ -398,337 +398,4 @@ sym(vp9_get8x8var_sse2):
pop
rbp
pop
rbp
ret
ret
;void vp9_half_horiz_vert_variance8x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_horiz_vert_variance8x_h_sse2
)
PRIVATE
sym
(
vp9_half_horiz_vert_variance8x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
%if ABI_IS_32BIT=0
movsxd
r8
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
r9
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
%endif
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr ;
mov
rdi
,
arg
(
2
)
;src_ptr ;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height ;
movsxd
rax
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
pxor
xmm0
,
xmm0
;
movq
xmm5
,
QWORD
PTR
[
rsi
]
; xmm5 = s0,s1,s2..s8
movq
xmm3
,
QWORD
PTR
[
rsi
+
1
]
; xmm3 = s1,s2,s3..s9
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3) horizontal line 1
%if ABI_IS_32BIT
add
rsi
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line ; next source
%else
add
rsi
,
r8
%endif
.half_horiz_vert_variance8x_h_1:
movq
xmm1
,
QWORD
PTR
[
rsi
]
;
movq
xmm2
,
QWORD
PTR
[
rsi
+
1
]
;
pavgb
xmm1
,
xmm2
; xmm1 = avg(xmm1,xmm3) horizontal line i+1
pavgb
xmm5
,
xmm1
; xmm = vertical average of the above
punpcklbw
xmm5
,
xmm0
; xmm5 = words of above
movq
xmm3
,
QWORD
PTR
[
rdi
]
; xmm3 = d0,d1,d2..d8
punpcklbw
xmm3
,
xmm0
; xmm3 = words of above
psubw
xmm5
,
xmm3
; xmm5 -= xmm3
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
movdqa
xmm5
,
xmm1
; save xmm1 for use on the next row
%if ABI_IS_32BIT
add
esi
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line ; next source
add
edi
,
dword
ptr
arg
(
3
)
;src_pixels_per_line ; next destination
%else
add
rsi
,
r8
add
rdi
,
r9
%endif
sub
rcx
,
1
;
jnz
.half_horiz_vert_variance8x_h_1
;
movdq2q
mm6
,
xmm6
;
movdq2q
mm7
,
xmm7
;
psrldq
xmm6
,
8
psrldq
xmm7
,
8
movdq2q
mm2
,
xmm6
movdq2q
mm3
,
xmm7
paddw
mm6
,
mm2
paddd
mm7
,
mm3
pxor
mm3
,
mm3
;
pxor
mm2
,
mm2
;
punpcklwd
mm2
,
mm6
;
punpckhwd
mm3
,
mm6
;
paddd
mm2
,
mm3
;
movq
mm6
,
mm2
;
psrlq
mm6
,
32
;
paddd
mm2
,
mm6
;
psrad
mm2
,
16
;
movq
mm4
,
mm7
;
psrlq
mm4
,
32
;
paddd
mm4
,
mm7
;
mov
rsi
,
arg
(
5
)
; sum
mov
rdi
,
arg
(
6
)
; sumsquared
movd
[
rsi
],
mm2
;
movd
[
rdi
],
mm4
;
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_half_vert_variance8x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_vert_variance8x_h_sse2
)
PRIVATE
sym
(
vp9_half_vert_variance8x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
%if ABI_IS_32BIT=0
movsxd
r8
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
r9
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
%endif
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr ;
mov
rdi
,
arg
(
2
)
;src_ptr ;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height ;
movsxd
rax
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
pxor
xmm0
,
xmm0
;
.half_vert_variance8x_h_1:
movq
xmm5
,
QWORD
PTR
[
rsi
]
; xmm5 = s0,s1,s2..s8
movq
xmm3
,
QWORD
PTR
[
rsi
+
rax
]
; xmm3 = s1,s2,s3..s9
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3)
punpcklbw
xmm5
,
xmm0
; xmm5 = words of above
movq
xmm3
,
QWORD
PTR
[
rdi
]
; xmm3 = d0,d1,d2..d8
punpcklbw
xmm3
,
xmm0
; xmm3 = words of above
psubw
xmm5
,
xmm3
; xmm5 -= xmm3
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
%if ABI_IS_32BIT
add
esi
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line ; next source
add
edi
,
dword
ptr
arg
(
3
)
;src_pixels_per_line ; next destination
%else
add
rsi
,
r8
add
rdi
,
r9
%endif
sub
rcx
,
1
;
jnz
.half_vert_variance8x_h_1
;
movdq2q
mm6
,
xmm6
;
movdq2q
mm7
,
xmm7
;
psrldq
xmm6
,
8
psrldq
xmm7
,
8
movdq2q
mm2
,
xmm6
movdq2q
mm3
,
xmm7
paddw
mm6
,
mm2
paddd
mm7
,
mm3
pxor
mm3
,
mm3
;
pxor
mm2
,
mm2
;
punpcklwd
mm2
,
mm6
;
punpckhwd
mm3
,
mm6
;
paddd
mm2
,
mm3
;
movq
mm6
,
mm2
;
psrlq
mm6
,
32
;
paddd
mm2
,
mm6
;
psrad
mm2
,
16
;
movq
mm4
,
mm7
;
psrlq
mm4
,
32
;
paddd
mm4
,
mm7
;
mov
rsi
,
arg
(
5
)
; sum
mov
rdi
,
arg
(
6
)
; sumsquared
movd
[
rsi
],
mm2
;
movd
[
rdi
],
mm4
;
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_half_horiz_variance8x_h_sse2
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; int *sum,
; unsigned int *sumsquared
;)
global
sym
(
vp9_half_horiz_variance8x_h_sse2
)
PRIVATE
sym
(
vp9_half_horiz_variance8x_h_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
SAVE_XMM
7
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
%if ABI_IS_32BIT=0
movsxd
r8
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line
movsxd
r9
,
dword
ptr
arg
(
3
)
;src_pixels_per_line
%endif
pxor
xmm6
,
xmm6
; error accumulator
pxor
xmm7
,
xmm7
; sse eaccumulator
mov
rsi
,
arg
(
0
)
;ref_ptr ;
mov
rdi
,
arg
(
2
)
;src_ptr ;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Height ;
pxor
xmm0
,
xmm0
;
.half_horiz_variance8x_h_1:
movq
xmm5
,
QWORD
PTR
[
rsi
]
; xmm5 = s0,s1,s2..s8
movq
xmm3
,
QWORD
PTR
[
rsi
+
1
]
; xmm3 = s1,s2,s3..s9
pavgb
xmm5
,
xmm3
; xmm5 = avg(xmm1,xmm3)
punpcklbw
xmm5
,
xmm0
; xmm5 = words of above
movq
xmm3
,
QWORD
PTR
[
rdi
]
; xmm3 = d0,d1,d2..d8
punpcklbw
xmm3
,
xmm0
; xmm3 = words of above
psubw
xmm5
,
xmm3
; xmm5 -= xmm3
paddw
xmm6
,
xmm5
; xmm6 += accumulated column differences
pmaddwd
xmm5
,
xmm5
; xmm5 *= xmm5
paddd
xmm7
,
xmm5
; xmm7 += accumulated square column differences
%if ABI_IS_32BIT
add
esi
,
dword
ptr
arg
(
1
)
;ref_pixels_per_line ; next source
add
edi
,
dword
ptr
arg
(
3
)
;src_pixels_per_line ; next destination
%else
add
rsi
,
r8
add
rdi
,
r9
%endif
sub
rcx
,
1
;
jnz
.half_horiz_variance8x_h_1
;
movdq2q
mm6
,
xmm6
;
movdq2q
mm7
,
xmm7
;
psrldq
xmm6
,
8
psrldq
xmm7
,
8
movdq2q
mm2
,
xmm6
movdq2q
mm3
,
xmm7
paddw
mm6
,
mm2
paddd
mm7
,
mm3
pxor
mm3
,
mm3
;
pxor
mm2
,
mm2
;
punpcklwd
mm2
,
mm6
;
punpckhwd
mm3
,
mm6
;
paddd
mm2
,
mm3
;
movq
mm6
,
mm2
;
psrlq
mm6
,
32
;
paddd
mm2
,
mm6
;
psrad
mm2
,
16
;
movq
mm4
,
mm7
;
psrlq
mm4
,
32
;
paddd
mm4
,
mm7
;
mov
rsi
,
arg
(
5
)
; sum
mov
rdi
,
arg
(
6
)
; sumsquared
movd
[
rsi
],
mm2
;
movd
[
rdi
],
mm4
;
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
REST
ORE_XMM
UNSHADOW_ARGS
pop
rbp
ret
This diff is collapsed.
Click to expand it.
vp9/encoder/x86/vp9_variance_sse2.c
+
0
−
60
View file @
e05b92c0
...
@@ -42,66 +42,6 @@ unsigned int vp9_get8x8var_sse2
...
@@ -42,66 +42,6 @@ unsigned int vp9_get8x8var_sse2
unsigned
int
*
SSE
,
unsigned
int
*
SSE
,
int
*
Sum
int
*
Sum
);
);
void
vp9_half_horiz_vert_variance8x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
void
vp9_half_horiz_vert_variance16x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
void
vp9_half_horiz_variance8x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
void
vp9_half_horiz_variance16x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
void
vp9_half_vert_variance8x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
void
vp9_half_vert_variance16x_h_sse2
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
unsigned
int
Height
,
int
*
sum
,
unsigned
int
*
sumsquared
);
typedef
unsigned
int
(
*
get_var_sse2
)
(
typedef
unsigned
int
(
*
get_var_sse2
)
(
const
unsigned
char
*
src_ptr
,
const
unsigned
char
*
src_ptr
,
...
...
This diff is collapsed.
Click to expand it.
vp9/vp9cx.mk
+
0
−
1
View file @
e05b92c0
...
@@ -96,7 +96,6 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
...
@@ -96,7 +96,6 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3)
+=
encoder/x86/vp9_sad_sse3.asm
VP9_CX_SRCS-$(HAVE_SSE3)
+=
encoder/x86/vp9_sad_sse3.asm
...
...
This diff is collapsed.
Click to expand it.
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets