Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
dfafd10e
Commit
dfafd10e
authored
Aug 23, 2017
by
Johann Koenig
Committed by
Gerrit Code Review
Aug 23, 2017
Browse files
Merge "quantize neon: round dqcoeff towards zero"
parents
e83d99d7
2a5aa98a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
29 deletions
+18
-29
vpx_dsp/arm/quantize_neon.c
vpx_dsp/arm/quantize_neon.c
+18
-29
No files found.
vpx_dsp/arm/quantize_neon.c
View file @
dfafd10e
...
...
@@ -11,6 +11,7 @@
#include <arm_neon.h>
#include <assert.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/mem_neon.h"
...
...
@@ -143,6 +144,10 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
}
static
INLINE
int32x4_t
extract_sign_bit
(
int32x4_t
a
)
{
return
vreinterpretq_s32_u32
(
vshrq_n_u32
(
vreinterpretq_u32_s32
(
a
),
31
));
}
// Main difference is that zbin values are halved before comparison and dqcoeff
// values are divided by 2. zbin is rounded but dqcoeff is not.
void
vpx_quantize_b_32x32_neon
(
...
...
@@ -184,7 +189,7 @@ void vpx_quantize_b_32x32_neon(
// (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16
int16x8_t
qcoeff
=
vshrq_n_s16
(
vqdmulhq_s16
(
rounded
,
quant
),
1
);
int16x8_t
dqcoeff
;
int32x4_t
dqcoeff_0
,
dqcoeff_1
,
dqcoeff_0_sign
,
dqcoeff_1_sign
;
int32x4_t
dqcoeff_0
,
dqcoeff_1
;
qcoeff
=
vaddq_s16
(
qcoeff
,
rounded
);
...
...
@@ -209,21 +214,12 @@ void vpx_quantize_b_32x32_neon(
dqcoeff_0
=
vmull_s16
(
vget_low_s16
(
qcoeff
),
vget_low_s16
(
dequant
));
dqcoeff_1
=
vmull_s16
(
vget_high_s16
(
qcoeff
),
vget_high_s16
(
dequant
));
// The way the C shifts the values requires us to convert to positive before
// shifting or even narrowing, then put the sign back.
dqcoeff_0_sign
=
vshrq_n_s32
(
dqcoeff_0
,
31
);
dqcoeff_1_sign
=
vshrq_n_s32
(
dqcoeff_1
,
31
);
dqcoeff_0
=
vabsq_s32
(
dqcoeff_0
);
dqcoeff_1
=
vabsq_s32
(
dqcoeff_1
);
dqcoeff_0
=
vshrq_n_s32
(
dqcoeff_0
,
1
);
dqcoeff_1
=
vshrq_n_s32
(
dqcoeff_1
,
1
);
dqcoeff_0
=
veorq_s32
(
dqcoeff_0
,
dqcoeff_0_sign
);
dqcoeff_1
=
veorq_s32
(
dqcoeff_1
,
dqcoeff_1_sign
);
dqcoeff_0
=
vsubq_s32
(
dqcoeff_0
,
dqcoeff_0_sign
);
dqcoeff_1
=
vsubq_s32
(
dqcoeff_1
,
dqcoeff_1_sign
);
// Narrow *without saturation* because that's what the C does.
dqcoeff
=
vcombine_s16
(
vmovn_s32
(
dqcoeff_0
),
vmovn_s32
(
dqcoeff_1
));
// Add 1 if negative to round towards zero because the C uses division.
dqcoeff_0
=
vaddq_s32
(
dqcoeff_0
,
extract_sign_bit
(
dqcoeff_0
));
dqcoeff_1
=
vaddq_s32
(
dqcoeff_1
,
extract_sign_bit
(
dqcoeff_1
));
dqcoeff
=
vcombine_s16
(
vshrn_n_s32
(
dqcoeff_0
,
1
),
vshrn_n_s32
(
dqcoeff_1
,
1
));
store_s16q_to_tran_low
(
dqcoeff_ptr
,
dqcoeff
);
dqcoeff_ptr
+=
8
;
...
...
@@ -253,7 +249,7 @@ void vpx_quantize_b_32x32_neon(
// (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16
int16x8_t
qcoeff
=
vshrq_n_s16
(
vqdmulhq_s16
(
rounded
,
quant
),
1
);
int16x8_t
dqcoeff
;
int32x4_t
dqcoeff_0
,
dqcoeff_1
,
dqcoeff_0_sign
,
dqcoeff_1_sign
;
int32x4_t
dqcoeff_0
,
dqcoeff_1
;
qcoeff
=
vaddq_s16
(
qcoeff
,
rounded
);
...
...
@@ -279,18 +275,11 @@ void vpx_quantize_b_32x32_neon(
dqcoeff_0
=
vmull_s16
(
vget_low_s16
(
qcoeff
),
vget_low_s16
(
dequant
));
dqcoeff_1
=
vmull_s16
(
vget_high_s16
(
qcoeff
),
vget_high_s16
(
dequant
));
dqcoeff_0_sign
=
vshrq_n_s32
(
dqcoeff_0
,
31
);
dqcoeff_1_sign
=
vshrq_n_s32
(
dqcoeff_1
,
31
);
dqcoeff_0
=
vabsq_s32
(
dqcoeff_0
);
dqcoeff_1
=
vabsq_s32
(
dqcoeff_1
);
dqcoeff_0
=
vshrq_n_s32
(
dqcoeff_0
,
1
);
dqcoeff_1
=
vshrq_n_s32
(
dqcoeff_1
,
1
);
dqcoeff_0
=
veorq_s32
(
dqcoeff_0
,
dqcoeff_0_sign
);
dqcoeff_1
=
veorq_s32
(
dqcoeff_1
,
dqcoeff_1_sign
);
dqcoeff_0
=
vsubq_s32
(
dqcoeff_0
,
dqcoeff_0_sign
);
dqcoeff_1
=
vsubq_s32
(
dqcoeff_1
,
dqcoeff_1_sign
);
dqcoeff
=
vcombine_s16
(
vmovn_s32
(
dqcoeff_0
),
vmovn_s32
(
dqcoeff_1
));
dqcoeff_0
=
vaddq_s32
(
dqcoeff_0
,
extract_sign_bit
(
dqcoeff_0
));
dqcoeff_1
=
vaddq_s32
(
dqcoeff_1
,
extract_sign_bit
(
dqcoeff_1
));
dqcoeff
=
vcombine_s16
(
vshrn_n_s32
(
dqcoeff_0
,
1
),
vshrn_n_s32
(
dqcoeff_1
,
1
));
store_s16q_to_tran_low
(
dqcoeff_ptr
,
dqcoeff
);
dqcoeff_ptr
+=
8
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment