Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
BC
public
external
libvpx
Commits
a84029ad
Commit
a84029ad
authored
11 years ago
by
Yunqing Wang
Committed by
Gerrit Code Review
11 years ago
Browse files
Options
Download
Plain Diff
Merge "AVX2 Variance Optimization"
parents
af720818
357b6536
v1.14.0-linphone
1.4.X
feature/update_to_v1.9.0-linphone
feature/uwp_nuget
frame_parallel
highbitdepth
indianrunnerduck
javanwhistlingduck
khakicampbell
linphone
linphone-android
linphone-old
longtailedduck
m49-2623
m52-2743
m54-2840
m56-2924
m66-3359
m68-3440
mandarinduck
mcw
mcw2
nextgen
nextgenv2
playground
sandbox/Jingning/experimental
sandbox/Jingning/transcode
sandbox/Jingning/vpx
sandbox/aconverse@google.com/ansbench
sandbox/hkuang/frame_parallel
sandbox/hkuang@google.com/decode
sandbox/jimbankoski@google.com/proposed-aom
sandbox/jingning@google.com/decoder_test_suite
sandbox/jingning@google.com/experimental
sandbox/jzern@google.com/test
sandbox/wangch@google.com/vp9
sandbox/yaowu@google.com/mergeaom
v1.12.0-linphone
v1.6.1_linphone
v1.7.0-linphone
v1.9.0-linphone
v1.9.0
v1.9.0-rc1
v1.8.2
v1.8.1
v1.8.0
v1.7.0
v1.6.1
v1.6.0
v1.5.0
v1.4.0
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
vp9/common/vp9_rtcd_defs.sh
+6
-6
vp9/common/vp9_rtcd_defs.sh
vp9/encoder/x86/vp9_variance_avx2.c
+157
-0
vp9/encoder/x86/vp9_variance_avx2.c
vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
+213
-0
vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
vp9/vp9cx.mk
+2
-0
vp9/vp9cx.mk
with
378 additions
and
6 deletions
vp9/common/vp9_rtcd_defs.sh
+
6
−
6
View file @
a84029ad
...
@@ -343,25 +343,25 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
...
@@ -343,25 +343,25 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
# variance
# variance
prototype unsigned int vp9_variance32x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance32x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x16
$sse2_x86inc
specialize vp9_variance32x16
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_variance16x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance16x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance16x32
$sse2_x86inc
specialize vp9_variance16x32
$sse2_x86inc
prototype unsigned int vp9_variance64x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance64x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance64x32
$sse2_x86inc
specialize vp9_variance64x32
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_variance32x64
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance32x64
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x64
$sse2_x86inc
specialize vp9_variance32x64
$sse2_x86inc
prototype unsigned int vp9_variance32x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance32x32
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x32
$sse2_x86inc
specialize vp9_variance32x32
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_variance64x64
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance64x64
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance64x64
$sse2_x86inc
specialize vp9_variance64x64
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_variance16x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance16x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance16x16 mmx
$sse2_x86inc
specialize vp9_variance16x16 mmx
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_variance16x8
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
prototype unsigned int vp9_variance16x8
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance16x8 mmx
$sse2_x86inc
specialize vp9_variance16x8 mmx
$sse2_x86inc
...
@@ -662,7 +662,7 @@ specialize vp9_sad4x4x4d sse
...
@@ -662,7 +662,7 @@ specialize vp9_sad4x4x4d sse
#specialize vp9_sub_pixel_mse16x16 sse2 mmx
#specialize vp9_sub_pixel_mse16x16 sse2 mmx
prototype unsigned int vp9_mse16x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
prototype unsigned int vp9_mse16x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
specialize vp9_mse16x16 mmx
$sse2_x86inc
specialize vp9_mse16x16 mmx
$sse2_x86inc
$avx2_x86inc
prototype unsigned int vp9_mse8x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
prototype unsigned int vp9_mse8x16
"const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
specialize vp9_mse8x16
specialize vp9_mse8x16
...
...
This diff is collapsed.
Click to expand it.
vp9/encoder/x86/vp9_variance_avx2.c
0 → 100644
+
157
−
0
View file @
a84029ad
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
"./vpx_config.h"
#include
"vp9/encoder/vp9_variance.h"
#include
"vp9/common/vp9_pragmas.h"
#include
"vpx_ports/mem.h"
typedef
void
(
*
get_var_avx2
)
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
SSE
,
int
*
Sum
);
void
vp9_get16x16var_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
SSE
,
int
*
Sum
);
void
vp9_get32x32var_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
SSE
,
int
*
Sum
);
static
void
variance_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
,
get_var_avx2
var_fn
,
int
block_size
)
{
unsigned
int
sse0
;
int
sum0
;
int
i
,
j
;
*
sse
=
0
;
*
sum
=
0
;
for
(
i
=
0
;
i
<
h
;
i
+=
16
)
{
for
(
j
=
0
;
j
<
w
;
j
+=
block_size
)
{
// processing 16 rows horizontally each call
var_fn
(
src_ptr
+
source_stride
*
i
+
j
,
source_stride
,
ref_ptr
+
recon_stride
*
i
+
j
,
recon_stride
,
&
sse0
,
&
sum0
);
*
sse
+=
sse0
;
*
sum
+=
sum0
;
}
}
}
unsigned
int
vp9_variance16x16_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
variance_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
16
,
16
,
&
var
,
&
avg
,
vp9_get16x16var_avx2
,
16
);
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
8
));
}
unsigned
int
vp9_mse16x16_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
sse0
;
int
sum0
;
vp9_get16x16var_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
&
sse0
,
&
sum0
);
*
sse
=
sse0
;
return
sse0
;
}
unsigned
int
vp9_variance32x32_avx2
(
const
uint8_t
*
src_ptr
,
int
source_stride
,
const
uint8_t
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
// processing 32 elements vertically in parallel
variance_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
32
,
32
,
&
var
,
&
avg
,
vp9_get32x32var_avx2
,
32
);
*
sse
=
var
;
return
(
var
-
(((
int64_t
)
avg
*
avg
)
>>
10
));
}
unsigned
int
vp9_variance32x16_avx2
(
const
uint8_t
*
src_ptr
,
int
source_stride
,
const
uint8_t
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
// processing 32 elements vertically in parallel
variance_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
32
,
16
,
&
var
,
&
avg
,
vp9_get32x32var_avx2
,
32
);
*
sse
=
var
;
return
(
var
-
(((
int64_t
)
avg
*
avg
)
>>
9
));
}
unsigned
int
vp9_variance64x64_avx2
(
const
uint8_t
*
src_ptr
,
int
source_stride
,
const
uint8_t
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
// processing 32 elements vertically in parallel
variance_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
64
,
64
,
&
var
,
&
avg
,
vp9_get32x32var_avx2
,
32
);
*
sse
=
var
;
return
(
var
-
(((
int64_t
)
avg
*
avg
)
>>
12
));
}
unsigned
int
vp9_variance64x32_avx2
(
const
uint8_t
*
src_ptr
,
int
source_stride
,
const
uint8_t
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
// processing 32 elements vertically in parallel
variance_avx2
(
src_ptr
,
source_stride
,
ref_ptr
,
recon_stride
,
64
,
32
,
&
var
,
&
avg
,
vp9_get32x32var_avx2
,
32
);
*
sse
=
var
;
return
(
var
-
(((
int64_t
)
avg
*
avg
)
>>
11
));
}
This diff is collapsed.
Click to expand it.
vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
0 → 100644
+
213
−
0
View file @
a84029ad
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<immintrin.h>
// AVX2
void
vp9_get16x16var_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
SSE
,
int
*
Sum
)
{
__m256i
src
,
src_expand_low
,
src_expand_high
,
ref
,
ref_expand_low
;
__m256i
ref_expand_high
,
madd_low
,
madd_high
;
unsigned
int
i
,
src_2strides
,
ref_2strides
;
__m256i
zero_reg
=
_mm256_set1_epi16
(
0
);
__m256i
sum_ref_src
=
_mm256_set1_epi16
(
0
);
__m256i
madd_ref_src
=
_mm256_set1_epi16
(
0
);
// processing two strides in a 256 bit register reducing the number
// of loop stride by half (comparing to the sse2 code)
src_2strides
=
source_stride
<<
1
;
ref_2strides
=
recon_stride
<<
1
;
for
(
i
=
0
;
i
<
8
;
i
++
)
{
src
=
_mm256_castsi128_si256
(
_mm_loadu_si128
((
__m128i
const
*
)
(
src_ptr
)));
src
=
_mm256_inserti128_si256
(
src
,
_mm_loadu_si128
((
__m128i
const
*
)(
src_ptr
+
source_stride
)),
1
);
ref
=
_mm256_castsi128_si256
(
_mm_loadu_si128
((
__m128i
const
*
)
(
ref_ptr
)));
ref
=
_mm256_inserti128_si256
(
ref
,
_mm_loadu_si128
((
__m128i
const
*
)(
ref_ptr
+
recon_stride
)),
1
);
// expanding to 16 bit each lane
src_expand_low
=
_mm256_unpacklo_epi8
(
src
,
zero_reg
);
src_expand_high
=
_mm256_unpackhi_epi8
(
src
,
zero_reg
);
ref_expand_low
=
_mm256_unpacklo_epi8
(
ref
,
zero_reg
);
ref_expand_high
=
_mm256_unpackhi_epi8
(
ref
,
zero_reg
);
// src-ref
src_expand_low
=
_mm256_sub_epi16
(
src_expand_low
,
ref_expand_low
);
src_expand_high
=
_mm256_sub_epi16
(
src_expand_high
,
ref_expand_high
);
// madd low (src - ref)
madd_low
=
_mm256_madd_epi16
(
src_expand_low
,
src_expand_low
);
// add high to low
src_expand_low
=
_mm256_add_epi16
(
src_expand_low
,
src_expand_high
);
// madd high (src - ref)
madd_high
=
_mm256_madd_epi16
(
src_expand_high
,
src_expand_high
);
sum_ref_src
=
_mm256_add_epi16
(
sum_ref_src
,
src_expand_low
);
// add high to low
madd_ref_src
=
_mm256_add_epi32
(
madd_ref_src
,
_mm256_add_epi32
(
madd_low
,
madd_high
));
src_ptr
+=
src_2strides
;
ref_ptr
+=
ref_2strides
;
}
{
__m128i
sum_res
,
madd_res
;
__m128i
expand_sum_low
,
expand_sum_high
,
expand_sum
;
__m128i
expand_madd_low
,
expand_madd_high
,
expand_madd
;
__m128i
ex_expand_sum_low
,
ex_expand_sum_high
,
ex_expand_sum
;
// extract the low lane and add it to the high lane
sum_res
=
_mm_add_epi16
(
_mm256_castsi256_si128
(
sum_ref_src
),
_mm256_extractf128_si256
(
sum_ref_src
,
1
));
madd_res
=
_mm_add_epi32
(
_mm256_castsi256_si128
(
madd_ref_src
),
_mm256_extractf128_si256
(
madd_ref_src
,
1
));
// padding each 2 bytes with another 2 zeroed bytes
expand_sum_low
=
_mm_unpacklo_epi16
(
_mm256_castsi256_si128
(
zero_reg
),
sum_res
);
expand_sum_high
=
_mm_unpackhi_epi16
(
_mm256_castsi256_si128
(
zero_reg
),
sum_res
);
// shifting the sign 16 bits right
expand_sum_low
=
_mm_srai_epi32
(
expand_sum_low
,
16
);
expand_sum_high
=
_mm_srai_epi32
(
expand_sum_high
,
16
);
expand_sum
=
_mm_add_epi32
(
expand_sum_low
,
expand_sum_high
);
// expand each 32 bits of the madd result to 64 bits
expand_madd_low
=
_mm_unpacklo_epi32
(
madd_res
,
_mm256_castsi256_si128
(
zero_reg
));
expand_madd_high
=
_mm_unpackhi_epi32
(
madd_res
,
_mm256_castsi256_si128
(
zero_reg
));
expand_madd
=
_mm_add_epi32
(
expand_madd_low
,
expand_madd_high
);
ex_expand_sum_low
=
_mm_unpacklo_epi32
(
expand_sum
,
_mm256_castsi256_si128
(
zero_reg
));
ex_expand_sum_high
=
_mm_unpackhi_epi32
(
expand_sum
,
_mm256_castsi256_si128
(
zero_reg
));
ex_expand_sum
=
_mm_add_epi32
(
ex_expand_sum_low
,
ex_expand_sum_high
);
// shift 8 bytes eight
madd_res
=
_mm_srli_si128
(
expand_madd
,
8
);
sum_res
=
_mm_srli_si128
(
ex_expand_sum
,
8
);
madd_res
=
_mm_add_epi32
(
madd_res
,
expand_madd
);
sum_res
=
_mm_add_epi32
(
sum_res
,
ex_expand_sum
);
*
((
int
*
)
SSE
)
=
_mm_cvtsi128_si32
(
madd_res
);
*
((
int
*
)
Sum
)
=
_mm_cvtsi128_si32
(
sum_res
);
}
}
void
vp9_get32x32var_avx2
(
const
unsigned
char
*
src_ptr
,
int
source_stride
,
const
unsigned
char
*
ref_ptr
,
int
recon_stride
,
unsigned
int
*
SSE
,
int
*
Sum
)
{
__m256i
src
,
src_expand_low
,
src_expand_high
,
ref
,
ref_expand_low
;
__m256i
ref_expand_high
,
madd_low
,
madd_high
;
unsigned
int
i
;
__m256i
zero_reg
=
_mm256_set1_epi16
(
0
);
__m256i
sum_ref_src
=
_mm256_set1_epi16
(
0
);
__m256i
madd_ref_src
=
_mm256_set1_epi16
(
0
);
// processing 32 elements in parallel
for
(
i
=
0
;
i
<
16
;
i
++
)
{
src
=
_mm256_loadu_si256
((
__m256i
const
*
)
(
src_ptr
));
ref
=
_mm256_loadu_si256
((
__m256i
const
*
)
(
ref_ptr
));
// expanding to 16 bit each lane
src_expand_low
=
_mm256_unpacklo_epi8
(
src
,
zero_reg
);
src_expand_high
=
_mm256_unpackhi_epi8
(
src
,
zero_reg
);
ref_expand_low
=
_mm256_unpacklo_epi8
(
ref
,
zero_reg
);
ref_expand_high
=
_mm256_unpackhi_epi8
(
ref
,
zero_reg
);
// src-ref
src_expand_low
=
_mm256_sub_epi16
(
src_expand_low
,
ref_expand_low
);
src_expand_high
=
_mm256_sub_epi16
(
src_expand_high
,
ref_expand_high
);
// madd low (src - ref)
madd_low
=
_mm256_madd_epi16
(
src_expand_low
,
src_expand_low
);
// add high to low
src_expand_low
=
_mm256_add_epi16
(
src_expand_low
,
src_expand_high
);
// madd high (src - ref)
madd_high
=
_mm256_madd_epi16
(
src_expand_high
,
src_expand_high
);
sum_ref_src
=
_mm256_add_epi16
(
sum_ref_src
,
src_expand_low
);
// add high to low
madd_ref_src
=
_mm256_add_epi32
(
madd_ref_src
,
_mm256_add_epi32
(
madd_low
,
madd_high
));
src_ptr
+=
source_stride
;
ref_ptr
+=
recon_stride
;
}
{
__m256i
expand_sum_low
,
expand_sum_high
,
expand_sum
;
__m256i
expand_madd_low
,
expand_madd_high
,
expand_madd
;
__m256i
ex_expand_sum_low
,
ex_expand_sum_high
,
ex_expand_sum
;
// padding each 2 bytes with another 2 zeroed bytes
expand_sum_low
=
_mm256_unpacklo_epi16
(
zero_reg
,
sum_ref_src
);
expand_sum_high
=
_mm256_unpackhi_epi16
(
zero_reg
,
sum_ref_src
);
// shifting the sign 16 bits right
expand_sum_low
=
_mm256_srai_epi32
(
expand_sum_low
,
16
);
expand_sum_high
=
_mm256_srai_epi32
(
expand_sum_high
,
16
);
expand_sum
=
_mm256_add_epi32
(
expand_sum_low
,
expand_sum_high
);
// expand each 32 bits of the madd result to 64 bits
expand_madd_low
=
_mm256_unpacklo_epi32
(
madd_ref_src
,
zero_reg
);
expand_madd_high
=
_mm256_unpackhi_epi32
(
madd_ref_src
,
zero_reg
);
expand_madd
=
_mm256_add_epi32
(
expand_madd_low
,
expand_madd_high
);
ex_expand_sum_low
=
_mm256_unpacklo_epi32
(
expand_sum
,
zero_reg
);
ex_expand_sum_high
=
_mm256_unpackhi_epi32
(
expand_sum
,
zero_reg
);
ex_expand_sum
=
_mm256_add_epi32
(
ex_expand_sum_low
,
ex_expand_sum_high
);
// shift 8 bytes eight
madd_ref_src
=
_mm256_srli_si256
(
expand_madd
,
8
);
sum_ref_src
=
_mm256_srli_si256
(
ex_expand_sum
,
8
);
madd_ref_src
=
_mm256_add_epi32
(
madd_ref_src
,
expand_madd
);
sum_ref_src
=
_mm256_add_epi32
(
sum_ref_src
,
ex_expand_sum
);
// extract the low lane and the high lane and add the results
*
((
int
*
)
SSE
)
=
_mm_cvtsi128_si32
(
_mm256_castsi256_si128
(
madd_ref_src
))
+
_mm_cvtsi128_si32
(
_mm256_extractf128_si256
(
madd_ref_src
,
1
));
*
((
int
*
)
Sum
)
=
_mm_cvtsi128_si32
(
_mm256_castsi256_si128
(
sum_ref_src
))
+
_mm_cvtsi128_si32
(
_mm256_extractf128_si256
(
sum_ref_src
,
1
));
}
}
This diff is collapsed.
Click to expand it.
vp9/vp9cx.mk
+
2
−
0
View file @
a84029ad
...
@@ -81,6 +81,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
...
@@ -81,6 +81,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
VP9_CX_SRCS-$(HAVE_MMX)
+=
encoder/x86/vp9_variance_impl_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX)
+=
encoder/x86/vp9_variance_impl_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX)
+=
encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX)
+=
encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_temporal_filter_apply_sse2.asm
...
@@ -91,6 +92,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
...
@@ -91,6 +92,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_variance_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subpel_variance.asm
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_subpel_variance.asm
endif
endif
...
...
This diff is collapsed.
Click to expand it.
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets