Commit 7f4acf87 authored by Linfeng Zhang's avatar Linfeng Zhang

Add vpx_idct16x16_38_add_sse2()

Change-Id: I28150789feadc0b63d2fadc707e48971b41f9898
parent aa1c4cd1
......@@ -717,6 +717,8 @@ const PartialInvTxfmParam sse2_partial_idct_tests[] = {
&wrapper<vpx_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
&wrapper<vpx_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_38_add_c>,
&wrapper<vpx_idct16x16_38_add_sse2>, TX_16X16, 38, 8, 1),
make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_10_add_c>,
&wrapper<vpx_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_1_add_c>,
......
......@@ -591,7 +591,6 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct8x8_1_add neon sse2/;
specialize qw/vpx_idct16x16_256_add neon sse2/;
specialize qw/vpx_idct16x16_38_add neon sse2/;
$vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
specialize qw/vpx_idct16x16_10_add neon sse2/;
specialize qw/vpx_idct16x16_1_add neon sse2/;
specialize qw/vpx_idct32x32_1024_add neon sse2/;
......
This diff is collapsed.
......@@ -89,6 +89,17 @@ static INLINE void multiplication_and_add_2(const __m128i *const in0,
*res1 = idct_calc_wraplow_sse2(lo, hi, *cst1);
}
// Multiply elements by constants and add them together.
static INLINE void multiplication_and_add(
const __m128i *const in0, const __m128i *const in1,
const __m128i *const in2, const __m128i *const in3,
const __m128i *const cst0, const __m128i *const cst1,
const __m128i *const cst2, const __m128i *const cst3, __m128i *const res0,
__m128i *const res1, __m128i *const res2, __m128i *const res3) {
multiplication_and_add_2(in0, in1, cst0, cst1, res0, res1);
multiplication_and_add_2(in2, in3, cst2, cst3, res2, res3);
}
// Functions to allow 8 bit optimisations to be used when profile 0 is used with
// highbitdepth enabled
static INLINE __m128i load_input_data4(const tran_low_t *data) {
......@@ -348,23 +359,6 @@ static INLINE void butterfly_self(__m128i *x0, __m128i *x1, const __m128i *c0,
*x1 = _mm_packs_epi32(tmp2, tmp3);
}
// Multiply elements by constants and add them together.
static INLINE void multiplication_and_add(
const __m128i *const in0, const __m128i *const in1,
const __m128i *const in2, const __m128i *const in3,
const __m128i *const cst0, const __m128i *const cst1,
const __m128i *const cst2, const __m128i *const cst3, __m128i *const res0,
__m128i *const res1, __m128i *const res2, __m128i *const res3) {
const __m128i lo_0 = _mm_unpacklo_epi16(*in0, *in1);
const __m128i hi_0 = _mm_unpackhi_epi16(*in0, *in1);
const __m128i lo_1 = _mm_unpacklo_epi16(*in2, *in3);
const __m128i hi_1 = _mm_unpackhi_epi16(*in2, *in3);
*res0 = idct_calc_wraplow_sse2(lo_0, hi_0, *cst0);
*res1 = idct_calc_wraplow_sse2(lo_0, hi_0, *cst1);
*res2 = idct_calc_wraplow_sse2(lo_1, hi_1, *cst2);
*res3 = idct_calc_wraplow_sse2(lo_1, hi_1, *cst3);
}
static INLINE void idct8(const __m128i *const in /*in[8]*/,
__m128i *const out /*out[8]*/) {
const __m128i cp_16_16 = pair_set_epi16(cospi_16_64, cospi_16_64);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment