Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
libvpx
Commits
e0cc52db
Commit
e0cc52db
authored
Jul 26, 2016
by
clang-format
Committed by
James Zern
Aug 02, 2016
Browse files
vp9/encoder: apply clang-format
Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
parent
3a04c9c9
Changes
81
Expand all
Hide whitespace changes
Inline
Side-by-side
vp9/encoder/arm/neon/vp9_dct_neon.c
View file @
e0cc52db
...
...
@@ -18,14 +18,13 @@
#include "vpx_dsp/txfm_common.h"
void
vp9_fdct8x8_quant_neon
(
const
int16_t
*
input
,
int
stride
,
int16_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
int16_t
*
qcoeff_ptr
,
int16_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan_ptr
,
const
int16_t
*
iscan_ptr
)
{
int16_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
int16_t
*
qcoeff_ptr
,
int16_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan_ptr
,
const
int16_t
*
iscan_ptr
)
{
int16_t
temp_buffer
[
64
];
(
void
)
coeff_ptr
;
...
...
vp9/encoder/arm/neon/vp9_quantize_neon.c
View file @
e0cc52db
...
...
@@ -26,8 +26,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
int16_t
*
qcoeff_ptr
,
int16_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(
void
)
zbin_ptr
;
...
...
@@ -54,12 +54,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const
int16x8_t
v_coeff
=
vld1q_s16
(
&
coeff_ptr
[
0
]);
const
int16x8_t
v_coeff_sign
=
vshrq_n_s16
(
v_coeff
,
15
);
const
int16x8_t
v_tmp
=
vabaq_s16
(
v_round
,
v_coeff
,
v_zero
);
const
int32x4_t
v_tmp_lo
=
vmull_s16
(
vget_low_s16
(
v_tmp
),
vget_low_s16
(
v_quant
));
const
int32x4_t
v_tmp_hi
=
vmull_s16
(
vget_high_s16
(
v_tmp
),
vget_high_s16
(
v_quant
));
const
int16x8_t
v_tmp2
=
vcombine_s16
(
vshrn_n_s32
(
v_tmp_lo
,
16
),
vshrn_n_s32
(
v_tmp_hi
,
16
));
const
int32x4_t
v_tmp_lo
=
vmull_s16
(
vget_low_s16
(
v_tmp
),
vget_low_s16
(
v_quant
));
const
int32x4_t
v_tmp_hi
=
vmull_s16
(
vget_high_s16
(
v_tmp
),
vget_high_s16
(
v_quant
));
const
int16x8_t
v_tmp2
=
vcombine_s16
(
vshrn_n_s32
(
v_tmp_lo
,
16
),
vshrn_n_s32
(
v_tmp_hi
,
16
));
const
uint16x8_t
v_nz_mask
=
vceqq_s16
(
v_tmp2
,
v_zero
);
const
int16x8_t
v_iscan_plus1
=
vaddq_s16
(
v_iscan
,
v_one
);
const
int16x8_t
v_nz_iscan
=
vbslq_s16
(
v_nz_mask
,
v_zero
,
v_iscan_plus1
);
...
...
@@ -79,12 +79,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const
int16x8_t
v_coeff
=
vld1q_s16
(
&
coeff_ptr
[
i
]);
const
int16x8_t
v_coeff_sign
=
vshrq_n_s16
(
v_coeff
,
15
);
const
int16x8_t
v_tmp
=
vabaq_s16
(
v_round
,
v_coeff
,
v_zero
);
const
int32x4_t
v_tmp_lo
=
vmull_s16
(
vget_low_s16
(
v_tmp
),
vget_low_s16
(
v_quant
));
const
int32x4_t
v_tmp_hi
=
vmull_s16
(
vget_high_s16
(
v_tmp
),
vget_high_s16
(
v_quant
));
const
int16x8_t
v_tmp2
=
vcombine_s16
(
vshrn_n_s32
(
v_tmp_lo
,
16
),
vshrn_n_s32
(
v_tmp_hi
,
16
));
const
int32x4_t
v_tmp_lo
=
vmull_s16
(
vget_low_s16
(
v_tmp
),
vget_low_s16
(
v_quant
));
const
int32x4_t
v_tmp_hi
=
vmull_s16
(
vget_high_s16
(
v_tmp
),
vget_high_s16
(
v_quant
));
const
int16x8_t
v_tmp2
=
vcombine_s16
(
vshrn_n_s32
(
v_tmp_lo
,
16
),
vshrn_n_s32
(
v_tmp_hi
,
16
));
const
uint16x8_t
v_nz_mask
=
vceqq_s16
(
v_tmp2
,
v_zero
);
const
int16x8_t
v_iscan_plus1
=
vaddq_s16
(
v_iscan
,
v_one
);
const
int16x8_t
v_nz_iscan
=
vbslq_s16
(
v_nz_mask
,
v_zero
,
v_iscan_plus1
);
...
...
@@ -96,9 +96,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
vst1q_s16
(
&
dqcoeff_ptr
[
i
],
v_dqcoeff
);
}
{
const
int16x4_t
v_eobmax_3210
=
vmax_s16
(
vget_low_s16
(
v_eobmax_76543210
),
vget_high_s16
(
v_eobmax_76543210
));
const
int16x4_t
v_eobmax_3210
=
vmax_s16
(
vget_low_s16
(
v_eobmax_76543210
),
vget_high_s16
(
v_eobmax_76543210
));
const
int64x1_t
v_eobmax_xx32
=
vshr_n_s64
(
vreinterpret_s64_s16
(
v_eobmax_3210
),
32
);
const
int16x4_t
v_eobmax_tmp
=
...
...
vp9/encoder/mips/msa/vp9_error_msa.c
View file @
e0cc52db
...
...
@@ -11,74 +11,73 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
const int16_t *dq_coeff_ptr, \
int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
sq_coeff_r, sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa( \
const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
BLOCK_ERROR_BLOCKSIZE_MSA
(
16
);
BLOCK_ERROR_BLOCKSIZE_MSA
(
64
);
...
...
@@ -86,25 +85,17 @@ BLOCK_ERROR_BLOCKSIZE_MSA(256);
BLOCK_ERROR_BLOCKSIZE_MSA
(
1024
);
int64_t
vp9_block_error_msa
(
const
tran_low_t
*
coeff_ptr
,
const
tran_low_t
*
dq_coeff_ptr
,
intptr_t
blk_size
,
int64_t
*
ssz
)
{
const
tran_low_t
*
dq_coeff_ptr
,
intptr_t
blk_size
,
int64_t
*
ssz
)
{
int64_t
err
;
const
int16_t
*
coeff
=
(
const
int16_t
*
)
coeff_ptr
;
const
int16_t
*
dq_coeff
=
(
const
int16_t
*
)
dq_coeff_ptr
;
switch
(
blk_size
)
{
case
16
:
err
=
block_error_16size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
64
:
err
=
block_error_64size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
256
:
err
=
block_error_256size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
1024
:
err
=
block_error_1024size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
16
:
err
=
block_error_16size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
64
:
err
=
block_error_64size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
256
:
err
=
block_error_256size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
case
1024
:
err
=
block_error_1024size_msa
(
coeff
,
dq_coeff
,
ssz
);
break
;
default:
err
=
vp9_block_error_c
(
coeff_ptr
,
dq_coeff_ptr
,
blk_size
,
ssz
);
break
;
...
...
vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
View file @
e0cc52db
...
...
@@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
LD_SH8
(
input
,
16
,
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
FDCT_POSTPROC_2V_NEG_H
(
r0
,
r1
);
FDCT_POSTPROC_2V_NEG_H
(
r2
,
r3
);
FDCT_POSTPROC_2V_NEG_H
(
r4
,
r5
);
...
...
@@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out
+=
64
;
LD_SH8
(
input
+
8
,
16
,
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
FDCT_POSTPROC_2V_NEG_H
(
r8
,
r9
);
FDCT_POSTPROC_2V_NEG_H
(
r10
,
r11
);
FDCT_POSTPROC_2V_NEG_H
(
r12
,
r13
);
...
...
@@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
input
+=
128
;
LD_SH8
(
input
,
16
,
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
FDCT_POSTPROC_2V_NEG_H
(
r0
,
r1
);
FDCT_POSTPROC_2V_NEG_H
(
r2
,
r3
);
FDCT_POSTPROC_2V_NEG_H
(
r4
,
r5
);
...
...
@@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out
+=
64
;
LD_SH8
(
input
+
8
,
16
,
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
FDCT_POSTPROC_2V_NEG_H
(
r8
,
r9
);
FDCT_POSTPROC_2V_NEG_H
(
r10
,
r11
);
FDCT_POSTPROC_2V_NEG_H
(
r12
,
r13
);
...
...
@@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
v8i16
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
;
/* load input data */
LD_SH16
(
input
,
8
,
l0
,
l8
,
l1
,
l9
,
l2
,
l10
,
l3
,
l11
,
l4
,
l12
,
l5
,
l13
,
l6
,
l14
,
l7
,
l15
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
LD_SH16
(
input
,
8
,
l0
,
l8
,
l1
,
l9
,
l2
,
l10
,
l3
,
l11
,
l4
,
l12
,
l5
,
l13
,
l6
,
l14
,
l7
,
l15
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
ST_SH8
(
r0
,
r8
,
r1
,
r9
,
r2
,
r10
,
r3
,
r11
,
out
,
8
);
ST_SH8
(
r4
,
r12
,
r5
,
r13
,
r6
,
r14
,
r7
,
r15
,
(
out
+
64
),
8
);
out
+=
16
*
8
;
/* load input data */
input
+=
128
;
LD_SH16
(
input
,
8
,
l0
,
l8
,
l1
,
l9
,
l2
,
l10
,
l3
,
l11
,
l4
,
l12
,
l5
,
l13
,
l6
,
l14
,
l7
,
l15
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
LD_SH16
(
input
,
8
,
l0
,
l8
,
l1
,
l9
,
l2
,
l10
,
l3
,
l11
,
l4
,
l12
,
l5
,
l13
,
l6
,
l14
,
l7
,
l15
);
TRANSPOSE8x8_SH_SH
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
TRANSPOSE8x8_SH_SH
(
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
ST_SH8
(
r0
,
r8
,
r1
,
r9
,
r2
,
r10
,
r3
,
r11
,
out
,
8
);
ST_SH8
(
r4
,
r12
,
r5
,
r13
,
r6
,
r14
,
r7
,
r15
,
(
out
+
64
),
8
);
}
...
...
@@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
LD_SH8
(
temp
,
16
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
temp
=
intermediate
+
8
;
LD_SH8
(
temp
,
16
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
FDCT_POSTPROC_2V_NEG_H
(
in0
,
in1
);
FDCT_POSTPROC_2V_NEG_H
(
in2
,
in3
);
FDCT_POSTPROC_2V_NEG_H
(
in4
,
in5
);
...
...
@@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
FDCT_POSTPROC_2V_NEG_H
(
in10
,
in11
);
FDCT_POSTPROC_2V_NEG_H
(
in12
,
in13
);
FDCT_POSTPROC_2V_NEG_H
(
in14
,
in15
);
BUTTERFLY_16
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
BUTTERFLY_16
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
temp
=
intermediate
;
ST_SH8
(
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
temp
,
16
);
FDCT8x16_EVEN
(
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
,
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
);
FDCT8x16_EVEN
(
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
,
tmp0
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
);
temp
=
intermediate
;
LD_SH8
(
temp
,
16
,
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
);
FDCT8x16_ODD
(
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
tmp0
,
in0
,
tmp1
,
in1
,
tmp2
,
in2
,
tmp3
,
in3
,
tmp0
,
in0
,
tmp1
,
in1
,
tmp2
,
in2
,
tmp3
,
in3
);
FDCT8x16_ODD
(
in8
,
in9
,
in10
,
in11
,
in12
,
in13
,
in14
,
in15
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
tmp0
,
in0
,
tmp1
,
in1
,
tmp2
,
in2
,
tmp3
,
in3
,
tmp0
,
in0
,
tmp1
,
in1
,
tmp2
,
in2
,
tmp3
,
in3
);
ST_SH8
(
tmp0
,
in0
,
tmp1
,
in1
,
tmp2
,
in2
,
tmp3
,
in3
,
out
,
16
);
TRANSPOSE8x8_SH_SH
(
tmp4
,
in4
,
tmp5
,
in5
,
tmp6
,
in6
,
tmp7
,
in7
,
tmp4
,
in4
,
tmp5
,
in5
,
tmp6
,
in6
,
tmp7
,
in7
);
TRANSPOSE8x8_SH_SH
(
tmp4
,
in4
,
tmp5
,
in5
,
tmp6
,
in6
,
tmp7
,
in7
,
tmp4
,
in4
,
tmp5
,
in5
,
tmp6
,
in6
,
tmp7
,
in7
);
out
=
output
+
8
;
ST_SH8
(
tmp4
,
in4
,
tmp5
,
in5
,
tmp6
,
in6
,
tmp7
,
in7
,
out
,
16
);
}
void
vp9_fht16x16_msa
(
const
int16_t
*
input
,
int16_t
*
output
,
int32_t
stride
,
int32_t
tx_type
)
{
void
vp9_fht16x16_msa
(
const
int16_t
*
input
,
int16_t
*
output
,
int32_t
stride
,
int32_t
tx_type
)
{
DECLARE_ALIGNED
(
32
,
int16_t
,
tmp
[
256
]);
DECLARE_ALIGNED
(
32
,
int16_t
,
trans_buf
[
256
]);
DECLARE_ALIGNED
(
32
,
int16_t
,
tmp_buf
[
128
]);
...
...
@@ -413,35 +412,31 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int16_t
*
ptmpbuf
=
&
tmp_buf
[
0
];
int16_t
*
trans
=
&
trans_buf
[
0
];
const
int32_t
const_arr
[
29
*
4
]
=
{
52707308
,
52707308
,
52707308
,
52707308
,
-
1072430300
,
-
1072430300
,
-
1072430300
,
-
1072430300
,
795618043
,
795618043
,
795618043
,
795618043
,
-
721080468
,
-
721080468
,
-
721080468
,
-
721080468
,
459094491
,
459094491
,
459094491
,
459094491
,
-
970646691
,
-
970646691
,
-
970646691
,
-
970646691
,
1010963856
,
1010963856
,
1010963856
,
1010963856
,
-
361743294
,
-
361743294
,
-
361743294
,
-
361743294
,
209469125
,
209469125
,
209469125
,
209469125
,
-
1053094788
,
-
1053094788
,
-
1053094788
,
-
1053094788
,
1053160324
,
1053160324
,
1053160324
,
1053160324
,
639644520
,
639644520
,
639644520
,
639644520
,
-
862444000
,
-
862444000
,
-
862444000
,
-
862444000
,
1062144356
,
1062144356
,
1062144356
,
1062144356
,
-
157532337
,
-
157532337
,
-
157532337
,
-
157532337
,
260914709
,
260914709
,
260914709
,
260914709
,
-
1041559667
,
-
1041559667
,
-
1041559667
,
-
1041559667
,
920985831
,
920985831
,
920985831
,
920985831
,
-
551995675
,
-
551995675
,
-
551995675
,
-
551995675
,
596522295
,
596522295
,
596522295
,
596522295
,
892853362
,
892853362
,
892853362
,
892853362
,
-
892787826
,
-
892787826
,
-
892787826
,
-
892787826
,
410925857
,
410925857
,
410925857
,
410925857
,
-
992012162
,
-
992012162
,
-
992012162
,
-
992012162
,
992077698
,
992077698
,
992077698
,
992077698
,
759246145
,
759246145
,
759246145
,
759246145
,
-
759180609
,
-
759180609
,
-
759180609
,
-
759180609
,
-
759222975
,
-
759222975
,
-
759222975
,
-
759222975
,
759288511
,
759288511
,
759288511
,
759288511
};
52707308
,
52707308
,
52707308
,
52707308
,
-
1072430300
,
-
1072430300
,
-
1072430300
,
-
1072430300
,
795618043
,
795618043
,
795618043
,
795618043
,
-
721080468
,
-
721080468
,
-
721080468
,
-
721080468
,
459094491
,
459094491
,
459094491
,
459094491
,
-
970646691
,
-
970646691
,
-
970646691
,
-
970646691
,
1010963856
,
1010963856
,
1010963856
,
1010963856
,
-
361743294
,
-
361743294
,
-
361743294
,
-
361743294
,
209469125
,
209469125
,
209469125
,
209469125
,
-
1053094788
,
-
1053094788
,
-
1053094788
,
-
1053094788
,
1053160324
,
1053160324
,
1053160324
,
1053160324
,
639644520
,
639644520
,
639644520
,
639644520
,
-
862444000
,
-
862444000
,
-
862444000
,
-
862444000
,
1062144356
,
1062144356
,
1062144356
,
1062144356
,
-
157532337
,
-
157532337
,
-
157532337
,
-
157532337
,
260914709
,
260914709
,
260914709
,
260914709
,
-
1041559667
,
-
1041559667
,
-
1041559667
,
-
1041559667
,
920985831
,
920985831
,
920985831
,
920985831
,
-
551995675
,
-
551995675
,
-
551995675
,
-
551995675
,
596522295
,
596522295
,
596522295
,
596522295
,
892853362
,
892853362
,
892853362
,
892853362
,
-
892787826
,
-
892787826
,
-
892787826
,
-
892787826
,
410925857
,
410925857
,
410925857
,
410925857
,
-
992012162
,
-
992012162
,
-
992012162
,
-
992012162
,
992077698
,
992077698
,
992077698
,
992077698
,
759246145
,
759246145
,
759246145
,
759246145
,
-
759180609
,
-
759180609
,
-
759180609
,
-
759180609
,
-
759222975
,
-
759222975
,
-
759222975
,
-
759222975
,
759288511
,
759288511
,
759288511
,
759288511
};
switch
(
tx_type
)
{
case
DCT_DCT
:
...
...
@@ -500,8 +495,6 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
fadst16_transpose_msa
(
tmp
,
output
);
break
;
default:
assert
(
0
);
break
;
default:
assert
(
0
);
break
;
}
}
vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
View file @
e0cc52db
...
...
@@ -86,9 +86,7 @@ void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP9_FADST4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
break
;
default:
assert
(
0
);
break
;
default:
assert
(
0
);
break
;
}
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
...
...
vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
View file @
e0cc52db
...
...
@@ -23,44 +23,42 @@ void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
switch
(
tx_type
)
{
case
DCT_DCT
:
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
break
;
case
ADST_DCT
:
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
break
;
case
DCT_ADST
:
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
break
;
case
ADST_ADST
:
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
break
;
default:
assert
(
0
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP9_ADST8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
break
;
default:
assert
(
0
);
break
;
}
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
SRLI_AVE_S_4V_H
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
ST_SH8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
output
,
8
);
}