diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index f6d2d5994bd8f77ddebfd6f01fbe5e66c4fa344b..9fb45d6b7e666f9ac981f8a0f3b8fde372088aef 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -17,6 +17,7 @@ extern "C" { #include "vp9/common/vp9_entropy.h" #include "vp9_rtcd.h" +void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch); } #include "acm_random.h" @@ -269,19 +270,23 @@ TEST(VP9Idct16x16Test, AccuracyCheck) { const int count_test_block = 1000; for (int i = 0; i < count_test_block; ++i) { int16_t in[256], coeff[256]; - int16_t out_c[256]; + uint8_t dst[256], src[256]; double out_r[256]; + for (int j = 0; j < 256; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 256; ++j) - in[j] = rnd.Rand8() - rnd.Rand8(); + in[j] = src[j] - dst[j]; reference_16x16_dct_2d(in, out_r); for (int j = 0; j < 256; j++) coeff[j] = round(out_r[j]); - vp9_short_idct16x16_c(coeff, out_c, 32); + vp9_short_idct16x16_add_c(coeff, dst, 16); for (int j = 0; j < 256; ++j) { - const int diff = out_c[j] - in[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; EXPECT_GE(1, error) << "Error: 16x16 IDCT has error " << error @@ -289,7 +294,7 @@ TEST(VP9Idct16x16Test, AccuracyCheck) { } } } -#if 1 + // we need enable fdct test once we re-do the 16 point fdct. TEST(VP9Fdct16x16Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); @@ -299,18 +304,22 @@ TEST(VP9Fdct16x16Test, AccuracyCheck) { for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[256]; int16_t test_temp_block[256]; - int16_t test_output_block[256]; + uint8_t dst[256], src[256]; + for (int j = 0; j < 256; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 256; ++j) - test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; const int pitch = 32; vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch); - vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch); + vp9_short_idct16x16_add_c(test_temp_block, dst, 16); for (int j = 0; j < 256; ++j) { - const int diff = test_input_block[j] - test_output_block[j]; + const int diff = dst[j] - src[j]; const int error = diff * diff; if (max_error < error) max_error = error; @@ -354,6 +363,4 @@ TEST(VP9Fdct16x16Test, CoeffSizeCheck) { } } } -#endif - } // namespace diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 5e6384c7a343891f4cde87a7332f41a532b307af..b166fcbba3b6383e5b63eece97a87baedc7f506d 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -621,10 +621,9 @@ static void idct16_1d(int16_t *input, int16_t *output) { output[15] = step2[0] - step2[15]; } -void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16]; int16_t *outptr = out; - const int half_pitch = pitch >> 1; int i, j; int16_t temp_in[16], temp_out[16]; @@ -641,7 +640,8 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } @@ -823,8 +823,8 @@ static const transform_2d IHT_16[] = { { iadst16_1d, iadst16_1d } // ADST_ADST = 3 }; -void vp9_short_iht16x16_c(int16_t *input, int16_t *output, - int pitch, int tx_type) { +void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, + int tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; @@ -844,37 +844,37 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); - } + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); } } -void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { - int16_t out[16 * 16]; - int16_t *outptr = out; - const int half_pitch = pitch >> 1; - int i, j; - int16_t temp_in[16], temp_out[16]; - - /* First transform rows. Since all non-zero dct coefficients are in - * upper-left 4x4 area, we only need to calculate first 4 rows here. - */ - vpx_memset(out, 0, sizeof(out)); - for (i = 0; i < 4; ++i) { - idct16_1d(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - idct16_1d(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); - } -} +void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, + int dest_stride) { + int16_t out[16 * 16]; + int16_t *outptr = out; + int i, j; + int16_t temp_in[16], temp_out[16]; + + /* First transform rows. Since all non-zero dct coefficients are in + * upper-left 4x4 area, we only need to calculate first 4 rows here. + */ + vpx_memset(out, 0, sizeof(out)); + for (i = 0; i < 4; ++i) { + idct16_1d(input, outptr); + input += 16; + outptr += 16; + } + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + idct16_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * dest_stride + i]); + } +} void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { int16_t out = dct_const_round_shift(input[0] * cospi_16_64); diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index c45d03084c8f1805ec3ac1b4cbeddf21c2ecf4f5..ea60fbb10f3f968b8df51e42a2fbe808b1844560 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -91,9 +91,6 @@ specialize vp9_add_residual_4x4 sse2 prototype void vp9_add_residual_8x8 "const int16_t *diff, uint8_t *dest, int stride" specialize vp9_add_residual_8x8 sse2 -prototype void vp9_add_residual_16x16 "const int16_t *diff, uint8_t *dest, int stride" -specialize vp9_add_residual_16x16 sse2 - prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride" specialize vp9_add_constant_residual_8x8 sse2 @@ -200,11 +197,11 @@ specialize vp9_short_idct10_8x8 sse2 prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output" specialize vp9_short_idct1_8x8 -prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct16x16 sse2 +prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct16x16_add sse2 -prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct10_16x16 sse2 +prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct10_16x16_add sse2 prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output" specialize vp9_short_idct1_16x16 @@ -224,8 +221,8 @@ specialize vp9_short_iht8x8 prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int tx_type" specialize vp9_short_iht4x4 -prototype void vp9_short_iht16x16 "int16_t *input, int16_t *output, int pitch, int tx_type" -specialize vp9_short_iht16x16 +prototype void vp9_short_iht16x16_add "int16_t *input, uint8_t *output, int pitch, int tx_type" +specialize vp9_short_iht16x16_add prototype void vp9_idct4_1d "int16_t *input, int16_t *output" specialize vp9_idct4_1d sse2 diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index e53a937f43258a241ea2b0d0bc655d4ddb4260ce..667da33696cc95c73613aaba2b537393514299b9 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -752,8 +752,17 @@ void vp9_short_idct10_8x8_sse2(int16_t *input, int16_t *output, int pitch) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -void vp9_short_idct16x16_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +#define RECON_AND_STORE(dest, in_x) \ + { \ + __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + in_x = _mm_add_epi16(in_x, d0); \ + in_x = _mm_packus_epi16(in_x, in_x); \ + _mm_storel_epi64((__m128i *)(dest), in_x); \ + dest += stride; \ + } + +void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -938,31 +947,30 @@ void vp9_short_idct16x16_sse2(int16_t *input, int16_t *output, int pitch) { in14 = _mm_srai_epi16(in14, 6); in15 = _mm_srai_epi16(in15, 6); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); - _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); - _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); - _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); - _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); - _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); - _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); - _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); - _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); - - output += 8; + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest, in8); + RECON_AND_STORE(dest, in9); + RECON_AND_STORE(dest, in10); + RECON_AND_STORE(dest, in11); + RECON_AND_STORE(dest, in12); + RECON_AND_STORE(dest, in13); + RECON_AND_STORE(dest, in14); + RECON_AND_STORE(dest, in15); + + dest += 8 - (stride * 16); } } } -void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { - const int half_pitch = pitch >> 1; +void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest, + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); const __m128i zero = _mm_setzero_si128(); @@ -1007,7 +1015,6 @@ void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; - // 1-D idct. Load input data. in0 = _mm_load_si128((__m128i *)input); in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); @@ -1298,24 +1305,24 @@ void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) { in14 = _mm_srai_epi16(in14, 6); in15 = _mm_srai_epi16(in15, 6); - // Store results - _mm_store_si128((__m128i *)output, in0); - _mm_store_si128((__m128i *)(output + half_pitch * 1), in1); - _mm_store_si128((__m128i *)(output + half_pitch * 2), in2); - _mm_store_si128((__m128i *)(output + half_pitch * 3), in3); - _mm_store_si128((__m128i *)(output + half_pitch * 4), in4); - _mm_store_si128((__m128i *)(output + half_pitch * 5), in5); - _mm_store_si128((__m128i *)(output + half_pitch * 6), in6); - _mm_store_si128((__m128i *)(output + half_pitch * 7), in7); - _mm_store_si128((__m128i *)(output + half_pitch * 8), in8); - _mm_store_si128((__m128i *)(output + half_pitch * 9), in9); - _mm_store_si128((__m128i *)(output + half_pitch * 10), in10); - _mm_store_si128((__m128i *)(output + half_pitch * 11), in11); - _mm_store_si128((__m128i *)(output + half_pitch * 12), in12); - _mm_store_si128((__m128i *)(output + half_pitch * 13), in13); - _mm_store_si128((__m128i *)(output + half_pitch * 14), in14); - _mm_store_si128((__m128i *)(output + half_pitch * 15), in15); - output += 8; + RECON_AND_STORE(dest, in0); + RECON_AND_STORE(dest, in1); + RECON_AND_STORE(dest, in2); + RECON_AND_STORE(dest, in3); + RECON_AND_STORE(dest, in4); + RECON_AND_STORE(dest, in5); + RECON_AND_STORE(dest, in6); + RECON_AND_STORE(dest, in7); + RECON_AND_STORE(dest, in8); + RECON_AND_STORE(dest, in9); + RECON_AND_STORE(dest, in10); + RECON_AND_STORE(dest, in11); + RECON_AND_STORE(dest, in12); + RECON_AND_STORE(dest, in13); + RECON_AND_STORE(dest, in14); + RECON_AND_STORE(dest, in15); + + dest += 8 - (stride * 16); } } @@ -1934,16 +1941,6 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { in30 = _mm_srai_epi16(in30, 6); in31 = _mm_srai_epi16(in31, 6); -#define RECON_AND_STORE(dest, in_x) \ - { \ - __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - in_x = _mm_add_epi16(in_x, d0); \ - in_x = _mm_packus_epi16(in_x, in_x); \ - _mm_storel_epi64((__m128i *)(dest), in_x); \ - dest += stride; \ - } - RECON_AND_STORE(dest, in0); RECON_AND_STORE(dest, in1); RECON_AND_STORE(dest, in2); diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index faaee7378134fa208b2779b91d96cdc915692543..bc943fa85dd30823e622338ab9fc7c6f425de429 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -105,10 +105,6 @@ void vp9_add_residual_8x8_c(const int16_t *diff, uint8_t *dest, int stride) { add_residual(diff, dest, stride, 8, 8); } -void vp9_add_residual_16x16_c(const int16_t *diff, uint8_t *dest, int stride) { - add_residual(diff, dest, stride, 16, 16); -} - static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, int width, int height) { int r, c; @@ -260,19 +256,14 @@ void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, if (tx_type == DCT_DCT) { vp9_idct_add_16x16(input, dest, stride, eob); } else { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - if (eob > 0) { - vp9_short_iht16x16(input, output, 16, tx_type); + vp9_short_iht16x16_add(input, dest, stride, tx_type); vpx_memset(input, 0, 512); - vp9_add_residual_16x16(output, dest, stride); } } } void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ if (eob) { @@ -288,21 +279,15 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { vp9_add_constant_residual_16x16(out, dest, stride); #if !CONFIG_SCATTERSCAN } else if (eob <= 10) { - // the idct halves ( >> 1) the pitch - vp9_short_idct10_16x16(input, output, 32); - + vp9_short_idct10_16x16_add(input, dest, stride); input[0] = input[1] = input[2] = input[3] = 0; input[16] = input[17] = input[18] = 0; input[32] = input[33] = 0; input[48] = 0; - - vp9_add_residual_16x16(output, dest, stride); #endif } else { - // the idct halves ( >> 1) the pitch - vp9_short_idct16x16(input, output, 16 << 1); + vp9_short_idct16x16_add(input, dest, stride); vpx_memset(input, 0, 512); - vp9_add_residual_16x16(output, dest, stride); } } } diff --git a/vp9/decoder/x86/vp9_dequantize_sse2.c b/vp9/decoder/x86/vp9_dequantize_sse2.c index 38fd5aaa449a0699f1443a01075fe297e0e77d3e..796fc123c7df01fb31b87eedf4ce391503ccdf07 100644 --- a/vp9/decoder/x86/vp9_dequantize_sse2.c +++ b/vp9/decoder/x86/vp9_dequantize_sse2.c @@ -122,65 +122,6 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, uint8_t *dest, int stride) { _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); } -void vp9_add_residual_16x16_sse2(const int16_t *diff, uint8_t *dest, - int stride) { - const int width = 16; - int i = 4; - const __m128i zero = _mm_setzero_si128(); - - // Diff data - __m128i d0, d1, d2, d3, d4, d5, d6, d7; - __m128i p0, p1, p2, p3, p4, p5, p6, p7; - - do { - d0 = _mm_load_si128((const __m128i *)(diff + 0 * width)); - d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8)); - d2 = _mm_load_si128((const __m128i *)(diff + 1 * width)); - d3 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8)); - d4 = _mm_load_si128((const __m128i *)(diff + 2 * width)); - d5 = _mm_load_si128((const __m128i *)(diff + 2 * width + 8)); - d6 = _mm_load_si128((const __m128i *)(diff + 3 * width)); - d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8)); - - // Prediction data. - p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); - p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); - p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); - p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); - - p0 = _mm_unpacklo_epi8(p1, zero); - p1 = _mm_unpackhi_epi8(p1, zero); - p2 = _mm_unpacklo_epi8(p3, zero); - p3 = _mm_unpackhi_epi8(p3, zero); - p4 = _mm_unpacklo_epi8(p5, zero); - p5 = _mm_unpackhi_epi8(p5, zero); - p6 = _mm_unpacklo_epi8(p7, zero); - p7 = _mm_unpackhi_epi8(p7, zero); - - p0 = _mm_add_epi16(p0, d0); - p1 = _mm_add_epi16(p1, d1); - p2 = _mm_add_epi16(p2, d2); - p3 = _mm_add_epi16(p3, d3); - p4 = _mm_add_epi16(p4, d4); - p5 = _mm_add_epi16(p5, d5); - p6 = _mm_add_epi16(p6, d6); - p7 = _mm_add_epi16(p7, d7); - - p0 = _mm_packus_epi16(p0, p1); - p1 = _mm_packus_epi16(p2, p3); - p2 = _mm_packus_epi16(p4, p5); - p3 = _mm_packus_epi16(p6, p7); - - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 1 * stride), p1); - _mm_store_si128((__m128i *)(dest + 2 * stride), p2); - _mm_store_si128((__m128i *)(dest + 3 * stride), p3); - - diff += 4 * width; - dest += 4 * stride; - } while (--i); -} - void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest, int stride) { uint8_t abs_diff; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index db18555f9d1b929c68ee323d257496553e9acb61..221de7426e5ca2e436c09dc7b81b870c5df6f946 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -522,11 +522,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, case TX_16X16: tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; if (tx_type == DCT_DCT) { - vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), - diff, bw * 2); + vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride); } else { - vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), - diff, bw, tx_type); + vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, + block, 16), dst, xd->plane[plane].dst.stride, + tx_type); } *wip_txfrm_size = 16; break; @@ -605,7 +606,7 @@ void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, foreach_transformed_block_uv(xd, bsize, encode_block, &arg); - if (wip_txfrm_size < 32) + if (wip_txfrm_size < 16) vp9_recon_sbuv(xd, bsize); } @@ -627,13 +628,13 @@ void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, // wip version... will use foreach_transformed_block when done foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); - if (wip_txfrm_size < 32) + if (wip_txfrm_size < 16) vp9_recon_sby(xd, bsize); wip_txfrm_size = 0; foreach_transformed_block_uv(xd, bsize, encode_block, &arg); - if (wip_txfrm_size < 32) + if (wip_txfrm_size < 16) vp9_recon_sbuv(xd, bsize); #endif }