Commit 563d58ab authored by Linfeng Zhang's avatar Linfeng Zhang
Browse files

Rewrite vpx_idct16x16_{10,256}_add_sse2() and add case 38 function

BUG=webm:1412

Change-Id: I945f0fb6807b8948747243794dc7352b959221f7
parent 6624f207
...@@ -651,6 +651,15 @@ const PartialInvTxfmParam sse2_partial_idct_tests[] = { ...@@ -651,6 +651,15 @@ const PartialInvTxfmParam sse2_partial_idct_tests[] = {
make_tuple( make_tuple(
&vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
&highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2), &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
make_tuple(
&vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_38_add_c>,
&highbd_wrapper<vpx_highbd_idct16x16_38_add_sse2>, TX_16X16, 38, 8, 2),
make_tuple(
&vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_38_add_c>,
&highbd_wrapper<vpx_highbd_idct16x16_38_add_sse2>, TX_16X16, 38, 10, 2),
make_tuple(
&vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_38_add_c>,
&highbd_wrapper<vpx_highbd_idct16x16_38_add_sse2>, TX_16X16, 38, 12, 2),
make_tuple( make_tuple(
&vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>,
&highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2), &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
......
...@@ -658,7 +658,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { ...@@ -658,7 +658,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_idct8x8_12_add neon sse2 sse4_1/; specialize qw/vpx_highbd_idct8x8_12_add neon sse2 sse4_1/;
specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/;
specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; specialize qw/vpx_highbd_idct16x16_38_add neon sse2/;
$vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2;
specialize qw/vpx_highbd_idct16x16_10_add neon sse2/; specialize qw/vpx_highbd_idct16x16_10_add neon sse2/;
specialize qw/vpx_highbd_idct32x32_1024_add neon/; specialize qw/vpx_highbd_idct32x32_1024_add neon/;
specialize qw/vpx_highbd_idct32x32_135_add neon/; specialize qw/vpx_highbd_idct32x32_135_add neon/;
......
This diff is collapsed.
...@@ -115,7 +115,7 @@ static INLINE void highbd_multiplication_and_add_sse2( ...@@ -115,7 +115,7 @@ static INLINE void highbd_multiplication_and_add_sse2(
__m128i *const out0, __m128i *const out1) { __m128i *const out0, __m128i *const out1) {
const __m128i pair_c0 = pair_set_epi32(c0 << 2, 0); const __m128i pair_c0 = pair_set_epi32(c0 << 2, 0);
const __m128i pair_c1 = pair_set_epi32(c1 << 2, 0); const __m128i pair_c1 = pair_set_epi32(c1 << 2, 0);
__m128i temp1[4], temp2[4], sign1[4], sign2[4]; __m128i temp1[4], temp2[4], sign1[2], sign2[2];
abs_extend_64bit_sse2(in0, temp1, sign1); abs_extend_64bit_sse2(in0, temp1, sign1);
abs_extend_64bit_sse2(in1, temp2, sign2); abs_extend_64bit_sse2(in1, temp2, sign2);
...@@ -139,6 +139,29 @@ static INLINE void highbd_multiplication_and_add_sse2( ...@@ -139,6 +139,29 @@ static INLINE void highbd_multiplication_and_add_sse2(
*out1 = pack_4(temp2[0], temp2[1]); *out1 = pack_4(temp2[0], temp2[1]);
} }
// Note: c0 and c1 must be non negative.
static INLINE void highbd_multiplication_sse2(const __m128i in, const int c0,
const int c1, __m128i *const out0,
__m128i *const out1) {
__m128i temp[2], sign[2];
abs_extend_64bit_sse2(in, temp, sign);
*out0 = multiplication_round_shift_sse2(temp, sign, c0);
*out1 = multiplication_round_shift_sse2(temp, sign, c1);
}
// Note: c0 and c1 must be non negative.
static INLINE void highbd_multiplication_neg_sse2(const __m128i in,
const int c0, const int c1,
__m128i *const out0,
__m128i *const out1) {
__m128i temp[2], sign[2];
abs_extend_64bit_sse2(in, temp, sign);
*out0 = multiplication_neg_round_shift_sse2(temp, sign, c1);
*out1 = multiplication_round_shift_sse2(temp, sign, c0);
}
static INLINE void highbd_idct8_stage4(const __m128i *const in, static INLINE void highbd_idct8_stage4(const __m128i *const in,
__m128i *const out) { __m128i *const out) {
out[0] = _mm_add_epi32(in[0], in[7]); out[0] = _mm_add_epi32(in[0], in[7]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment