Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
b6fe63a5
Commit
b6fe63a5
authored
Jan 13, 2017
by
Jingning Han
Committed by
Gerrit Code Review
Jan 13, 2017
Browse files
Merge "Rework 8x8 transpose SSSE3 for avg computation"
parents
553e9e29
39fff1be
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
39 additions
and
30 deletions
+39
-30
vpx_dsp/x86/avg_ssse3_x86_64.asm
vpx_dsp/x86/avg_ssse3_x86_64.asm
+39
-30
No files found.
vpx_dsp/x86/avg_ssse3_x86_64.asm
View file @
b6fe63a5
...
...
@@ -12,38 +12,47 @@
%include "third_party/x86inc/x86inc.asm"
; This file provides SSSE3 version of the hadamard transformation. Part
; of the macro definitions are originally derived from the ffmpeg project.
; The current version applies to x86 64-bit only.
SECTION
.text
%if ARCH_X86_64
; matrix transpose
%macro INTERLEAVE_2X 4
punpckh
%
1
m
%
4
,
m
%
2
,
m
%
3
punpckl
%
1
m
%
2
,
m
%
3
SWAP
%
3
,
%
4
%endmacro
%macro TRANSPOSE8X8 9
INTERLEAVE_2X
wd
,
%
1
,
%
2
,
%
9
INTERLEAVE_2X
wd
,
%
3
,
%
4
,
%
9
INTERLEAVE_2X
wd
,
%
5
,
%
6
,
%
9
INTERLEAVE_2X
wd
,
%
7
,
%
8
,
%
9
INTERLEAVE_2X
dq
,
%
1
,
%
3
,
%
9
INTERLEAVE_2X
dq
,
%
2
,
%
4
,
%
9
INTERLEAVE_2X
dq
,
%
5
,
%
7
,
%
9
INTERLEAVE_2X
dq
,
%
6
,
%
8
,
%
9
INTERLEAVE_2X
qdq
,
%
1
,
%
5
,
%
9
INTERLEAVE_2X
qdq
,
%
3
,
%
7
,
%
9
INTERLEAVE_2X
qdq
,
%
2
,
%
6
,
%
9
INTERLEAVE_2X
qdq
,
%
4
,
%
8
,
%
9
SWAP
%
2
,
%
5
SWAP
%
4
,
%
7
%macro TRANSPOSE8X8 10
; stage 1
punpcklwd
m
%
9
,
m
%
1
,
m
%
2
punpcklwd
m
%
10
,
m
%
3
,
m
%
4
punpckhwd
m
%
1
,
m
%
2
punpckhwd
m
%
3
,
m
%
4
punpcklwd
m
%
2
,
m
%
5
,
m
%
6
punpcklwd
m
%
4
,
m
%
7
,
m
%
8
punpckhwd
m
%
5
,
m
%
6
punpckhwd
m
%
7
,
m
%
8
; stage 2
punpckldq
m
%
6
,
m
%
9
,
m
%
10
punpckldq
m
%
8
,
m
%
1
,
m
%
3
punpckhdq
m
%
9
,
m
%
10
punpckhdq
m
%
1
,
m
%
3
punpckldq
m
%
10
,
m
%
2
,
m
%
4
punpckldq
m
%
3
,
m
%
5
,
m
%
7
punpckhdq
m
%
2
,
m
%
4
punpckhdq
m
%
5
,
m
%
7
; stage 3
punpckhqdq
m
%
4
,
m
%
9
,
m
%
2
; out3
punpcklqdq
m
%
9
,
m
%
2
; out2
punpcklqdq
m
%
7
,
m
%
1
,
m
%
5
; out6
punpckhqdq
m
%
1
,
m
%
5
; out7
punpckhqdq
m
%
2
,
m
%
6
,
m
%
10
; out1
punpcklqdq
m
%
6
,
m
%
10
; out0
punpcklqdq
m
%
5
,
m
%
8
,
m
%
3
; out4
punpckhqdq
m
%
8
,
m
%
3
; out5
SWAP
%
6
,
%
1
SWAP
%
3
,
%
9
SWAP
%
8
,
%
6
%endmacro
%macro HMD8_1D 0
...
...
@@ -88,7 +97,7 @@ SECTION .text
%endmacro
INIT_XMM
ss
se3
cglobal
hadamard_8x8
,
3
,
5
,
1
0
,
input
,
stride
,
output
cglobal
hadamard_8x8
,
3
,
5
,
1
1
,
input
,
stride
,
output
lea
r3
,
[
2
*
strideq
]
lea
r4
,
[
4
*
strideq
]
...
...
@@ -105,7 +114,7 @@ cglobal hadamard_8x8, 3, 5, 10, input, stride, output
mova
m7
,
[
inputq
+
r3
]
HMD8_1D
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
,
10
HMD8_1D
mova
[
outputq
+
0
],
m0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment