Commit 64653fa1 authored by Linfeng Zhang's avatar Linfeng Zhang Committed by James Zern

Change cospi_{1...31}_64 from tran_high_t to tran_coef_t

The unnecessary upcast to (int) will be cleaned later.

BUG=webm:1450

Change-Id: Ia234575206d5a74540526924b06ed3939322d063
parent f7b276c2
......@@ -169,8 +169,8 @@ static INLINE void butterfly_one_coeff(const int16x8_t a, const int16x8_t b,
// fdct_round_shift(a * c0 +/- b * c1)
static INLINE void butterfly_two_coeff(const int16x8_t a, const int16x8_t b,
const tran_high_t c0,
const tran_high_t c1, int16x8_t *add,
const tran_coef_t c0,
const tran_coef_t c1, int16x8_t *add,
int16x8_t *sub) {
const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), c0);
const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), c0);
......
......@@ -214,8 +214,8 @@ static INLINE void butterfly_one_coeff(const int16x8_t a, const int16x8_t b,
// fdct_round_shift(a * c0 +/- b * c1)
static INLINE void butterfly_two_coeff(const int16x8_t a, const int16x8_t b,
const tran_high_t constant0,
const tran_high_t constant1,
const tran_coef_t constant0,
const tran_coef_t constant1,
int16x8_t *add, int16x8_t *sub) {
const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), constant0);
const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), constant0);
......@@ -590,19 +590,14 @@ static INLINE void butterfly_one_coeff_s16_s32(
// Like butterfly_one_coeff, but with s32.
static INLINE void butterfly_one_coeff_s32(
const int32x4_t a_lo, const int32x4_t a_hi, const int32x4_t b_lo,
const int32x4_t b_hi, const tran_high_t constant, int32x4_t *add_lo,
const int32x4_t b_hi, const int32_t constant, int32x4_t *add_lo,
int32x4_t *add_hi, int32x4_t *sub_lo, int32x4_t *sub_hi) {
// TODO(johannkoenig): Strangely there is only a conversion warning on int64_t
// to int32_t (const tran_high_t (aka const long long)) but not for int64_t to
// int16_t. The constants fit in int16_t. Investigate using int16_t for the
// constants to avoid bouncing between types.
const int32_t constant_s32 = (int32_t)constant;
const int32x4_t a_lo_0 = vmulq_n_s32(a_lo, constant_s32);
const int32x4_t a_hi_0 = vmulq_n_s32(a_hi, constant_s32);
const int32x4_t sum0 = vmlaq_n_s32(a_lo_0, b_lo, constant_s32);
const int32x4_t sum1 = vmlaq_n_s32(a_hi_0, b_hi, constant_s32);
const int32x4_t diff0 = vmlsq_n_s32(a_lo_0, b_lo, constant_s32);
const int32x4_t diff1 = vmlsq_n_s32(a_hi_0, b_hi, constant_s32);
const int32x4_t a_lo_0 = vmulq_n_s32(a_lo, constant);
const int32x4_t a_hi_0 = vmulq_n_s32(a_hi, constant);
const int32x4_t sum0 = vmlaq_n_s32(a_lo_0, b_lo, constant);
const int32x4_t sum1 = vmlaq_n_s32(a_hi_0, b_hi, constant);
const int32x4_t diff0 = vmlsq_n_s32(a_lo_0, b_lo, constant);
const int32x4_t diff1 = vmlsq_n_s32(a_hi_0, b_hi, constant);
*add_lo = vrshrq_n_s32(sum0, DCT_CONST_BITS);
*add_hi = vrshrq_n_s32(sum1, DCT_CONST_BITS);
*sub_lo = vrshrq_n_s32(diff0, DCT_CONST_BITS);
......@@ -621,19 +616,17 @@ static INLINE void butterfly_one_coeff_s32(
// Like butterfly_two_coeff, but with s32.
static INLINE void butterfly_two_coeff_s32(
const int32x4_t a_lo, const int32x4_t a_hi, const int32x4_t b_lo,
const int32x4_t b_hi, const tran_high_t constant0,
const tran_high_t constant1, int32x4_t *add_lo, int32x4_t *add_hi,
int32x4_t *sub_lo, int32x4_t *sub_hi) {
const int32_t constant0_s32 = (int32_t)constant0;
const int32_t constant1_s32 = (int32_t)constant1;
const int32x4_t a0 = vmulq_n_s32(a_lo, constant0_s32);
const int32x4_t a1 = vmulq_n_s32(a_hi, constant0_s32);
const int32x4_t a2 = vmulq_n_s32(a_lo, constant1_s32);
const int32x4_t a3 = vmulq_n_s32(a_hi, constant1_s32);
const int32x4_t sum0 = vmlaq_n_s32(a2, b_lo, constant0_s32);
const int32x4_t sum1 = vmlaq_n_s32(a3, b_hi, constant0_s32);
const int32x4_t diff0 = vmlsq_n_s32(a0, b_lo, constant1_s32);
const int32x4_t diff1 = vmlsq_n_s32(a1, b_hi, constant1_s32);
const int32x4_t b_hi, const int32_t constant0, const int32_t constant1,
int32x4_t *add_lo, int32x4_t *add_hi, int32x4_t *sub_lo,
int32x4_t *sub_hi) {
const int32x4_t a0 = vmulq_n_s32(a_lo, constant0);
const int32x4_t a1 = vmulq_n_s32(a_hi, constant0);
const int32x4_t a2 = vmulq_n_s32(a_lo, constant1);
const int32x4_t a3 = vmulq_n_s32(a_hi, constant1);
const int32x4_t sum0 = vmlaq_n_s32(a2, b_lo, constant0);
const int32x4_t sum1 = vmlaq_n_s32(a3, b_hi, constant0);
const int32x4_t diff0 = vmlsq_n_s32(a0, b_lo, constant1);
const int32x4_t diff1 = vmlsq_n_s32(a1, b_hi, constant1);
*add_lo = vrshrq_n_s32(sum0, DCT_CONST_BITS);
*add_hi = vrshrq_n_s32(sum1, DCT_CONST_BITS);
*sub_lo = vrshrq_n_s32(diff0, DCT_CONST_BITS);
......
......@@ -1410,10 +1410,10 @@ static INLINE void highbd_idct16x16_1_add_neg_kernel(uint16_t **dest,
void vpx_highbd_idct16x16_1_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
const tran_low_t out0 =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
const tran_low_t out1 =
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const tran_low_t out0 = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
const tran_low_t out1 = HIGHBD_WRAPLOW(
dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
const int16x8_t dc = vdupq_n_s16(a1);
int i;
......
......@@ -61,10 +61,10 @@ static INLINE void highbd_idct32x32_1_add_neg_kernel(uint16_t **dest,
void vpx_highbd_idct32x32_1_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
const tran_low_t out0 =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
const tran_low_t out1 =
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const tran_low_t out0 = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
const tran_low_t out1 = HIGHBD_WRAPLOW(
dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
const int16x8_t dc = vdupq_n_s16(a1);
int i;
......
......@@ -54,10 +54,10 @@ static INLINE void highbd_idct4x4_1_add_kernel2(uint16_t **dest,
void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
const tran_low_t out0 =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
const tran_low_t out1 =
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const tran_low_t out0 = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
const tran_low_t out1 = HIGHBD_WRAPLOW(
dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4);
const int16x8_t dc = vdupq_n_s16(a1);
......
......@@ -38,10 +38,10 @@ static INLINE void highbd_idct8x8_1_add_neg_kernel(uint16_t **dest,
void vpx_highbd_idct8x8_1_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
const tran_low_t out0 =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
const tran_low_t out1 =
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const tran_low_t out0 = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
const tran_low_t out1 = HIGHBD_WRAPLOW(
dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
const int16x8_t dc = vdupq_n_s16(a1);
......
This diff is collapsed.
......@@ -25,37 +25,37 @@
// printf("static const int cospi_%d_64 = %.0f;\n", i,
// round(16384 * cos(i*M_PI/64)));
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
static const tran_high_t cospi_1_64 = 16364;
static const tran_high_t cospi_2_64 = 16305;
static const tran_high_t cospi_3_64 = 16207;
static const tran_high_t cospi_4_64 = 16069;
static const tran_high_t cospi_5_64 = 15893;
static const tran_high_t cospi_6_64 = 15679;
static const tran_high_t cospi_7_64 = 15426;
static const tran_high_t cospi_8_64 = 15137;
static const tran_high_t cospi_9_64 = 14811;
static const tran_high_t cospi_10_64 = 14449;
static const tran_high_t cospi_11_64 = 14053;
static const tran_high_t cospi_12_64 = 13623;
static const tran_high_t cospi_13_64 = 13160;
static const tran_high_t cospi_14_64 = 12665;
static const tran_high_t cospi_15_64 = 12140;
static const tran_high_t cospi_16_64 = 11585;
static const tran_high_t cospi_17_64 = 11003;
static const tran_high_t cospi_18_64 = 10394;
static const tran_high_t cospi_19_64 = 9760;
static const tran_high_t cospi_20_64 = 9102;
static const tran_high_t cospi_21_64 = 8423;
static const tran_high_t cospi_22_64 = 7723;
static const tran_high_t cospi_23_64 = 7005;
static const tran_high_t cospi_24_64 = 6270;
static const tran_high_t cospi_25_64 = 5520;
static const tran_high_t cospi_26_64 = 4756;
static const tran_high_t cospi_27_64 = 3981;
static const tran_high_t cospi_28_64 = 3196;
static const tran_high_t cospi_29_64 = 2404;
static const tran_high_t cospi_30_64 = 1606;
static const tran_high_t cospi_31_64 = 804;
static const tran_coef_t cospi_1_64 = 16364;
static const tran_coef_t cospi_2_64 = 16305;
static const tran_coef_t cospi_3_64 = 16207;
static const tran_coef_t cospi_4_64 = 16069;
static const tran_coef_t cospi_5_64 = 15893;
static const tran_coef_t cospi_6_64 = 15679;
static const tran_coef_t cospi_7_64 = 15426;
static const tran_coef_t cospi_8_64 = 15137;
static const tran_coef_t cospi_9_64 = 14811;
static const tran_coef_t cospi_10_64 = 14449;
static const tran_coef_t cospi_11_64 = 14053;
static const tran_coef_t cospi_12_64 = 13623;
static const tran_coef_t cospi_13_64 = 13160;
static const tran_coef_t cospi_14_64 = 12665;
static const tran_coef_t cospi_15_64 = 12140;
static const tran_coef_t cospi_16_64 = 11585;
static const tran_coef_t cospi_17_64 = 11003;
static const tran_coef_t cospi_18_64 = 10394;
static const tran_coef_t cospi_19_64 = 9760;
static const tran_coef_t cospi_20_64 = 9102;
static const tran_coef_t cospi_21_64 = 8423;
static const tran_coef_t cospi_22_64 = 7723;
static const tran_coef_t cospi_23_64 = 7005;
static const tran_coef_t cospi_24_64 = 6270;
static const tran_coef_t cospi_25_64 = 5520;
static const tran_coef_t cospi_26_64 = 4756;
static const tran_coef_t cospi_27_64 = 3981;
static const tran_coef_t cospi_28_64 = 3196;
static const tran_coef_t cospi_29_64 = 2404;
static const tran_coef_t cospi_30_64 = 1606;
static const tran_coef_t cospi_31_64 = 804;
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
static const tran_coef_t sinpi_1_9 = 5283;
......
......@@ -147,8 +147,10 @@ void vpx_highbd_idct4x4_1_add_sse2(const tran_low_t *input, uint16_t *dest,
tran_low_t out;
__m128i dc, d;
out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
out = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
out =
HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 4);
dc = _mm_set1_epi16(a1);
......
......@@ -265,8 +265,10 @@ static INLINE void highbd_idct_1_add_kernel(const tran_low_t *input,
tran_low_t out;
__m128i dc, d;
out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
out = HIGHBD_WRAPLOW(
dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
out =
HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, (size == 8) ? 5 : 6);
dc = _mm_set1_epi16(a1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment