Commit e0cc52db authored by clang-format's avatar clang-format Committed by James Zern
Browse files

vp9/encoder: apply clang-format

Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
parent 3a04c9c9
......@@ -18,14 +18,13 @@
#include "vpx_dsp/txfm_common.h"
void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
const int16_t* dequant_ptr, uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
int16_t temp_buffer[64];
(void)coeff_ptr;
......
......@@ -26,8 +26,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
......@@ -54,12 +54,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
......@@ -79,12 +79,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
......@@ -96,9 +96,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
}
{
const int16x4_t v_eobmax_3210 =
vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
const int16x4_t v_eobmax_3210 = vmax_s16(
vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
const int64x1_t v_eobmax_xx32 =
vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
const int16x4_t v_eobmax_tmp =
......
......@@ -11,74 +11,73 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
const int16_t *dq_coeff_ptr, \
int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
sq_coeff_r, sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa( \
const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
BLOCK_ERROR_BLOCKSIZE_MSA(16);
BLOCK_ERROR_BLOCKSIZE_MSA(64);
......@@ -86,25 +85,17 @@ BLOCK_ERROR_BLOCKSIZE_MSA(256);
BLOCK_ERROR_BLOCKSIZE_MSA(1024);
int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
const tran_low_t *dq_coeff_ptr,
intptr_t blk_size, int64_t *ssz) {
const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
int64_t *ssz) {
int64_t err;
const int16_t *coeff = (const int16_t *)coeff_ptr;
const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
switch (blk_size) {
case 16:
err = block_error_16size_msa(coeff, dq_coeff, ssz);
break;
case 64:
err = block_error_64size_msa(coeff, dq_coeff, ssz);
break;
case 256:
err = block_error_256size_msa(coeff, dq_coeff, ssz);
break;
case 1024:
err = block_error_1024size_msa(coeff, dq_coeff, ssz);
break;
case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
default:
err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
break;
......
......@@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
......@@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
......@@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
input += 128;
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
......@@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
......@@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
/* load input data */
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
out += 16 * 8;
/* load input data */
input += 128;
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
}
......@@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
temp = intermediate + 8;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
in10, in11, in12, in13, in14, in15);
FDCT_POSTPROC_2V_NEG_H(in0, in1);
FDCT_POSTPROC_2V_NEG_H(in2, in3);
FDCT_POSTPROC_2V_NEG_H(in4, in5);
......@@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
FDCT_POSTPROC_2V_NEG_H(in10, in11);
FDCT_POSTPROC_2V_NEG_H(in12, in13);
FDCT_POSTPROC_2V_NEG_H(in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
in8, in9, in10, in11, in12, in13, in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
temp = intermediate;
ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
temp = intermediate;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
tmp1, in1, tmp2, in2, tmp3, in3);
ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
tmp5, in5, tmp6, in6, tmp7, in7);
out = output + 8;
ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
}
void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int32_t stride, int32_t tx_type) {
void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride,
int32_t tx_type) {
DECLARE_ALIGNED(32, int16_t, tmp[256]);
DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
......@@ -413,35 +412,31 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int16_t *ptmpbuf = &tmp_buf[0];
int16_t *trans = &trans_buf[0];
const int32_t const_arr[29 * 4] = {
52707308, 52707308, 52707308, 52707308,
-1072430300, -1072430300, -1072430300, -1072430300,
795618043, 795618043, 795618043, 795618043,
-721080468, -721080468, -721080468, -721080468,
459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691,
1010963856, 1010963856, 1010963856, 1010963856,
-361743294, -361743294, -361743294, -361743294,
209469125, 209469125, 209469125, 209469125,
-1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324,
639644520, 639644520, 639644520, 639644520,
-862444000, -862444000, -862444000, -862444000,
1062144356, 1062144356, 1062144356, 1062144356,
-157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709,
-1041559667, -1041559667, -1041559667, -1041559667,
920985831, 920985831, 920985831, 920985831,
-551995675, -551995675, -551995675, -551995675,
596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362,
-892787826, -892787826, -892787826, -892787826,
410925857, 410925857, 410925857, 410925857,
-992012162, -992012162, -992012162, -992012162,
992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145,
-759180609, -759180609, -759180609, -759180609,
-759222975, -759222975, -759222975, -759222975,
759288511, 759288511, 759288511, 759288511 };
52707308, 52707308, 52707308, 52707308, -1072430300,
-1072430300, -1072430300, -1072430300, 795618043, 795618043,
795618043, 795618043, -721080468, -721080468, -721080468,
-721080468, 459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691, 1010963856,
1010963856, 1010963856, 1010963856, -361743294, -361743294,
-361743294, -361743294, 209469125, 209469125, 209469125,
209469125, -1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324, 639644520,
639644520, 639644520, 639644520, -862444000, -862444000,
-862444000, -862444000, 1062144356, 1062144356, 1062144356,
1062144356, -157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709, -1041559667,
-1041559667, -1041559667, -1041559667, 920985831, 920985831,
920985831, 920985831, -551995675, -551995675, -551995675,
-551995675, 596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362, -892787826,
-892787826, -892787826, -892787826, 410925857, 410925857,
410925857, 410925857, -992012162, -992012162, -992012162,
-992012162, 992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145, -759180609,
-759180609, -759180609, -759180609, -759222975, -759222975,
-759222975, -759222975, 759288511, 759288511, 759288511,
759288511
};
switch (tx_type) {
case DCT_DCT:
......@@ -500,8 +495,6 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
fadst16_transpose_msa(tmp, output);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
......@@ -86,9 +86,7 @@ void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
break;
default:
assert(0);
break;
default: assert(0); break;
}
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
......
......@@ -23,44 +23,42 @@ void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
switch (tx_type) {
case DCT_DCT:
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_DCT:
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case DCT_ADST:
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_ADST:
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
break;
default:
assert(0);