Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
b9c1dcc5
Commit
b9c1dcc5
authored
Aug 22, 2017
by
Johann
Browse files
quantize ssse3: copy style from sse2
Change-Id: I53f8a160e640c674ea035fc112e207b6dca42598
parent
75752ab7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
77 additions
and
97 deletions
+77
-97
vpx_dsp/x86/quantize_ssse3.c
vpx_dsp/x86/quantize_ssse3.c
+77
-97
No files found.
vpx_dsp/x86/quantize_ssse3.c
View file @
b9c1dcc5
...
...
@@ -23,104 +23,88 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan_ptr
,
const
int16_t
*
iscan_ptr
)
{
const
__m128i
zero
=
_mm_setzero_si128
();
intptr_t
index
=
16
;
__m128i
zbin
,
round
,
quant
,
dequant
,
shift
;
__m128i
coeff0
,
coeff1
;
__m128i
eob
;
__m128i
zbin
;
__m128i
round
,
quant
,
dequant
,
shift
;
intptr_t
index
=
0
;
__m128i
qcoeff0
,
qcoeff1
;
__m128i
cmp_mask0
,
cmp_mask1
;
__m128i
qtmp0
,
qtmp1
;
__m128i
zero_coeff0
,
zero_coeff1
,
iscan0
,
iscan1
;
__m128i
eob
,
eob0
,
eob1
;
(
void
)
scan_ptr
;
(
void
)
skip_block
;
assert
(
!
skip_block
);
// Setup global values
{
const
__m128i
one
=
_mm_set1_epi16
(
1
);
zbin
=
_mm_load_si128
((
const
__m128i
*
)
zbin_ptr
);
// x86 has no "greater *or equal* comparison. Subtract 1 from zbin so
// it is a strict "greater" comparison.
zbin
=
_mm_sub_epi16
(
zbin
,
one
);
round
=
_mm_load_si128
((
const
__m128i
*
)
round_ptr
);
quant
=
_mm_load_si128
((
const
__m128i
*
)
quant_ptr
);
dequant
=
_mm_load_si128
((
const
__m128i
*
)
dequant_ptr
);
shift
=
_mm_load_si128
((
const
__m128i
*
)
quant_shift_ptr
);
}
{
__m128i
qcoeff0
,
qcoeff1
;
__m128i
qtmp0
,
qtmp1
;
__m128i
cmp_mask0
,
cmp_mask1
;
__m128i
zero_coeff0
,
zero_coeff1
;
__m128i
iscan0
,
iscan1
;
__m128i
eob1
;
// Do DC and first 15 AC
coeff0
=
load_tran_low
(
coeff_ptr
+
index
);
coeff1
=
load_tran_low
(
coeff_ptr
+
index
+
8
);
qcoeff0
=
_mm_abs_epi16
(
coeff0
);
qcoeff1
=
_mm_abs_epi16
(
coeff1
);
cmp_mask0
=
_mm_cmpgt_epi16
(
qcoeff0
,
zbin
);
// Overwrite DC component.
zbin
=
_mm_unpackhi_epi64
(
zbin
,
zbin
);
cmp_mask1
=
_mm_cmpgt_epi16
(
qcoeff1
,
zbin
);
qcoeff0
=
_mm_adds_epi16
(
qcoeff0
,
round
);
round
=
_mm_unpackhi_epi64
(
round
,
round
);
qcoeff1
=
_mm_adds_epi16
(
qcoeff1
,
round
);
qtmp0
=
_mm_mulhi_epi16
(
qcoeff0
,
quant
);
quant
=
_mm_unpackhi_epi64
(
quant
,
quant
);
qtmp1
=
_mm_mulhi_epi16
(
qcoeff1
,
quant
);
qtmp0
=
_mm_add_epi16
(
qtmp0
,
qcoeff0
);
qtmp1
=
_mm_add_epi16
(
qtmp1
,
qcoeff1
);
qcoeff0
=
_mm_mulhi_epi16
(
qtmp0
,
shift
);
shift
=
_mm_unpackhi_epi64
(
shift
,
shift
);
qcoeff1
=
_mm_mulhi_epi16
(
qtmp1
,
shift
);
// Reinsert signs
qcoeff0
=
_mm_sign_epi16
(
qcoeff0
,
coeff0
);
qcoeff1
=
_mm_sign_epi16
(
qcoeff1
,
coeff1
);
// Mask out zbin threshold coeffs
qcoeff0
=
_mm_and_si128
(
qcoeff0
,
cmp_mask0
);
qcoeff1
=
_mm_and_si128
(
qcoeff1
,
cmp_mask1
);
store_tran_low
(
qcoeff0
,
qcoeff_ptr
+
index
);
store_tran_low
(
qcoeff1
,
qcoeff_ptr
+
index
+
8
);
coeff0
=
_mm_mullo_epi16
(
qcoeff0
,
dequant
);
dequant
=
_mm_unpackhi_epi64
(
dequant
,
dequant
);
coeff1
=
_mm_mullo_epi16
(
qcoeff1
,
dequant
);
store_tran_low
(
coeff0
,
dqcoeff_ptr
+
index
);
store_tran_low
(
coeff1
,
dqcoeff_ptr
+
index
+
8
);
// Scan for eob
zero_coeff0
=
_mm_cmpeq_epi16
(
coeff0
,
zero
);
zero_coeff1
=
_mm_cmpeq_epi16
(
coeff1
,
zero
);
iscan0
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
index
));
iscan1
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
index
+
8
));
// Add one to convert from indices to counts
iscan0
=
_mm_sub_epi16
(
iscan0
,
cmp_mask0
);
iscan1
=
_mm_sub_epi16
(
iscan1
,
cmp_mask1
);
eob
=
_mm_andnot_si128
(
zero_coeff0
,
iscan0
);
eob1
=
_mm_andnot_si128
(
zero_coeff1
,
iscan1
);
eob
=
_mm_max_epi16
(
eob
,
eob1
);
}
index
+=
16
;
// AC only loop
// Setup global values.
zbin
=
_mm_load_si128
((
const
__m128i
*
)
zbin_ptr
);
// x86 has no "greater *or equal* comparison. Subtract 1 from zbin so
// it is a strict "greater" comparison.
zbin
=
_mm_sub_epi16
(
zbin
,
_mm_set1_epi16
(
1
));
round
=
_mm_load_si128
((
const
__m128i
*
)
round_ptr
);
quant
=
_mm_load_si128
((
const
__m128i
*
)
quant_ptr
);
dequant
=
_mm_load_si128
((
const
__m128i
*
)
dequant_ptr
);
shift
=
_mm_load_si128
((
const
__m128i
*
)
quant_shift_ptr
);
// Do DC and first 15 AC.
coeff0
=
load_tran_low
(
coeff_ptr
);
coeff1
=
load_tran_low
(
coeff_ptr
+
8
);
qcoeff0
=
_mm_abs_epi16
(
coeff0
);
qcoeff1
=
_mm_abs_epi16
(
coeff1
);
cmp_mask0
=
_mm_cmpgt_epi16
(
qcoeff0
,
zbin
);
zbin
=
_mm_unpackhi_epi64
(
zbin
,
zbin
);
// Switch DC to AC
cmp_mask1
=
_mm_cmpgt_epi16
(
qcoeff1
,
zbin
);
qcoeff0
=
_mm_adds_epi16
(
qcoeff0
,
round
);
round
=
_mm_unpackhi_epi64
(
round
,
round
);
qcoeff1
=
_mm_adds_epi16
(
qcoeff1
,
round
);
qtmp0
=
_mm_mulhi_epi16
(
qcoeff0
,
quant
);
quant
=
_mm_unpackhi_epi64
(
quant
,
quant
);
qtmp1
=
_mm_mulhi_epi16
(
qcoeff1
,
quant
);
qtmp0
=
_mm_add_epi16
(
qtmp0
,
qcoeff0
);
qtmp1
=
_mm_add_epi16
(
qtmp1
,
qcoeff1
);
qcoeff0
=
_mm_mulhi_epi16
(
qtmp0
,
shift
);
shift
=
_mm_unpackhi_epi64
(
shift
,
shift
);
qcoeff1
=
_mm_mulhi_epi16
(
qtmp1
,
shift
);
// Reinsert signs
qcoeff0
=
_mm_sign_epi16
(
qcoeff0
,
coeff0
);
qcoeff1
=
_mm_sign_epi16
(
qcoeff1
,
coeff1
);
// Mask out zbin threshold coeffs
qcoeff0
=
_mm_and_si128
(
qcoeff0
,
cmp_mask0
);
qcoeff1
=
_mm_and_si128
(
qcoeff1
,
cmp_mask1
);
store_tran_low
(
qcoeff0
,
qcoeff_ptr
);
store_tran_low
(
qcoeff1
,
qcoeff_ptr
+
8
);
coeff0
=
_mm_mullo_epi16
(
qcoeff0
,
dequant
);
dequant
=
_mm_unpackhi_epi64
(
dequant
,
dequant
);
coeff1
=
_mm_mullo_epi16
(
qcoeff1
,
dequant
);
store_tran_low
(
coeff0
,
dqcoeff_ptr
);
store_tran_low
(
coeff1
,
dqcoeff_ptr
+
8
);
// Scan for eob.
zero_coeff0
=
_mm_cmpeq_epi16
(
coeff0
,
zero
);
zero_coeff1
=
_mm_cmpeq_epi16
(
coeff1
,
zero
);
iscan0
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
));
iscan1
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
8
));
// Add one to convert from indices to counts
iscan0
=
_mm_sub_epi16
(
iscan0
,
cmp_mask0
);
iscan1
=
_mm_sub_epi16
(
iscan1
,
cmp_mask1
);
eob
=
_mm_andnot_si128
(
zero_coeff0
,
iscan0
);
eob1
=
_mm_andnot_si128
(
zero_coeff1
,
iscan1
);
eob
=
_mm_max_epi16
(
eob
,
eob1
);
// AC only loop.
while
(
index
<
n_coeffs
)
{
__m128i
qcoeff0
,
qcoeff1
;
__m128i
qtmp0
,
qtmp1
;
__m128i
cmp_mask0
,
cmp_mask1
;
__m128i
zero_coeff0
,
zero_coeff1
;
__m128i
iscan0
,
iscan1
;
__m128i
eob0
,
eob1
;
coeff0
=
load_tran_low
(
coeff_ptr
+
index
);
coeff1
=
load_tran_low
(
coeff_ptr
+
index
+
8
);
...
...
@@ -142,11 +126,9 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
qcoeff0
=
_mm_mulhi_epi16
(
qtmp0
,
shift
);
qcoeff1
=
_mm_mulhi_epi16
(
qtmp1
,
shift
);
// Reinsert signs
qcoeff0
=
_mm_sign_epi16
(
qcoeff0
,
coeff0
);
qcoeff1
=
_mm_sign_epi16
(
qcoeff1
,
coeff1
);
// Mask out zbin threshold coeffs
qcoeff0
=
_mm_and_si128
(
qcoeff0
,
cmp_mask0
);
qcoeff1
=
_mm_and_si128
(
qcoeff1
,
cmp_mask1
);
...
...
@@ -159,12 +141,10 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
store_tran_low
(
coeff0
,
dqcoeff_ptr
+
index
);
store_tran_low
(
coeff1
,
dqcoeff_ptr
+
index
+
8
);
// Scan for eob
zero_coeff0
=
_mm_cmpeq_epi16
(
coeff0
,
zero
);
zero_coeff1
=
_mm_cmpeq_epi16
(
coeff1
,
zero
);
iscan0
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
index
));
iscan1
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
index
+
8
));
// Add one to convert from indices to counts
iscan0
=
_mm_sub_epi16
(
iscan0
,
cmp_mask0
);
iscan1
=
_mm_sub_epi16
(
iscan1
,
cmp_mask1
);
eob0
=
_mm_andnot_si128
(
zero_coeff0
,
iscan0
);
...
...
@@ -175,7 +155,7 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
index
+=
16
;
}
// Accumulate
EOB
// Accumulate
eob.
{
__m128i
eob_shuffled
;
eob_shuffled
=
_mm_shuffle_epi32
(
eob
,
0xe
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment