diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index d819a3d69707c125b04b2e6044483f5a9ffc0fd7..fcc91c2fcb8d090de760ec1caf3c02285de42a96 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -494,8 +494,23 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { return tx_type; } +#define USE_ADST_FOR_I16X16_8X8 0 +#define USE_ADST_FOR_I16X16_4X4 0 +#define USE_ADST_FOR_I8X8_4X4 1 +#define USE_ADST_PERIPHERY_ONLY 1 + static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { + // TODO(debargha): explore different patterns for ADST usage when blocksize + // is smaller than the prediction size TX_TYPE tx_type = DCT_DCT; + int ib = (int)(b - xd->block); + if (ib >= 16) + return tx_type; +#if CONFIG_SUPERBLOCKS + // TODO(rbultje, debargha): Explore ADST usage for superblocks + if (xd->mode_info_context->mbmi.encoded_as_sb) + return tx_type; +#endif if (xd->mode_info_context->mbmi.mode == B_PRED && xd->q_index < ACTIVE_HT) { tx_type = txfm_map( @@ -503,28 +518,92 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context : #endif b->bmi.as_mode.first); + } else if (xd->mode_info_context->mbmi.mode == I8X8_PRED && + xd->q_index < ACTIVE_HT) { +#if USE_ADST_FOR_I8X8_4X4 +#if USE_ADST_PERIPHERY_ONLY + // Use ADST for periphery blocks only + int ic = (ib & 10); + b += ic - ib; + tx_type = (ic != 10) ? + txfm_map(pred_mode_conv((MB_PREDICTION_MODE)b->bmi.as_mode.first)) : + DCT_DCT; +#else + // Use ADST + tx_type = txfm_map(pred_mode_conv( + (MB_PREDICTION_MODE)b->bmi.as_mode.first)); +#endif +#else + // Use 2D DCT + tx_type = DCT_DCT; +#endif + } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && + xd->q_index < ACTIVE_HT) { +#if USE_ADST_FOR_I16X16_4X4 +#if USE_ADST_PERIPHERY_ONLY + // Use ADST for periphery blocks only + tx_type = (ib < 4 || ((ib & 3) == 0)) ? + txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT; +#else + // Use ADST + tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); +#endif +#else + // Use 2D DCT + tx_type = DCT_DCT; +#endif } return tx_type; } static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { + // TODO(debargha): explore different patterns for ADST usage when blocksize + // is smaller than the prediction size TX_TYPE tx_type = DCT_DCT; + int ib = (int)(b - xd->block); + if (ib >= 16) + return tx_type; +#if CONFIG_SUPERBLOCKS + // TODO(rbultje, debargha): Explore ADST usage for superblocks + if (xd->mode_info_context->mbmi.encoded_as_sb) + return tx_type; +#endif if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT8) { // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged // or the relationship otherwise modified to address this type conversion. tx_type = txfm_map(pred_mode_conv( - (MB_PREDICTION_MODE)b->bmi.as_mode.first)); + (MB_PREDICTION_MODE)b->bmi.as_mode.first)); + } else if (xd->mode_info_context->mbmi.mode < I8X8_PRED && + xd->q_index < ACTIVE_HT8) { +#if USE_ADST_FOR_I8X8_4X4 +#if USE_ADST_PERIPHERY_ONLY + // Use ADST for periphery blocks only + tx_type = (ib != 10) ? + txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT; +#else + // Use ADST + tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); +#endif +#else + // Use 2D DCT + tx_type = DCT_DCT; +#endif } return tx_type; } static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { TX_TYPE tx_type = DCT_DCT; - if (xd->mode_info_context->mbmi.mode < I8X8_PRED && + int ib = (int)(b - xd->block); + if (ib >= 16) + return tx_type; #if CONFIG_SUPERBLOCKS - !xd->mode_info_context->mbmi.encoded_as_sb && + // TODO(rbultje, debargha): Explore ADST usage for superblocks + if (xd->mode_info_context->mbmi.encoded_as_sb) + return tx_type; #endif + if (xd->mode_info_context->mbmi.mode < I8X8_PRED && xd->q_index < ACTIVE_HT16) { tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); } @@ -549,6 +628,16 @@ static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; } +static int get_2nd_order_usage(const MACROBLOCKD *xd) { + int has_2nd_order = (xd->mode_info_context->mbmi.mode != SPLITMV && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.txfm_size != TX_16X16); + if (has_2nd_order) + has_2nd_order = (get_tx_type(xd, xd->block) == DCT_DCT); + return has_2nd_order; +} + extern void vp9_build_block_doffsets(MACROBLOCKD *xd); extern void vp9_setup_block_dptrs(MACROBLOCKD *xd); diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index b79977385cb22f93f4fde32636371054cde3f234..627a62bf74c942e2f23574f85df5dbb15ba29cb3 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -40,20 +40,28 @@ void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) { void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { int i; BLOCKD *blockd = xd->block; + int has_2nd_order = get_2nd_order_usage(xd); - if (xd->mode_info_context->mbmi.mode != SPLITMV) { + if (has_2nd_order) { /* do 2nd order transform on the dc block */ vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff); recon_dcblock(xd); } for (i = 0; i < 16; i++) { - vp9_inverse_transform_b_4x4(xd, i, 32); + TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + vp9_ihtllm_c(xd->block[i].dqcoeff, xd->block[i].diff, 32, + tx_type, 4); + } else { + vp9_inverse_transform_b_4x4(xd, i, 32); + } } } void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { int i; + for (i = 16; i < 24; i++) { vp9_inverse_transform_b_4x4(xd, i, 16); } @@ -72,20 +80,31 @@ void vp9_inverse_transform_b_8x8(short *input_dqcoeff, short *output_coeff, void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { int i; BLOCKD *blockd = xd->block; + int has_2nd_order = get_2nd_order_usage(xd); - if (xd->mode_info_context->mbmi.mode != SPLITMV) { + if (has_2nd_order) { // do 2nd order transform on the dc block vp9_short_ihaar2x2(blockd[24].dqcoeff, blockd[24].diff, 8); recon_dcblock_8x8(xd); // need to change for 8x8 } for (i = 0; i < 9; i += 8) { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], - &blockd[i].diff[0], 32); + TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + vp9_ihtllm_c(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8); + } else { + vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], + &blockd[i].diff[0], 32); + } } for (i = 2; i < 11; i += 8) { - vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], - &blockd[i].diff[0], 32); + TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + vp9_ihtllm_c(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8); + } else { + vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], + &blockd[i].diff[0], 32); + } } } @@ -110,8 +129,14 @@ void vp9_inverse_transform_b_16x16(short *input_dqcoeff, } void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], - &xd->block[0].diff[0], 32); + BLOCKD *bd = &xd->block[0]; + TX_TYPE tx_type = get_tx_type_16x16(xd, bd); + if (tx_type != DCT_DCT) { + vp9_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16); + } else { + vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], + &xd->block[0].diff[0], 32); + } } void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index dce49304d4bb5db8caf8848860e75a6a12fb34b7..bf60630e38aba33e3d5f1307e09557591647e5d3 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -278,7 +278,7 @@ void vp9_find_mv_refs( &index, c_refmv, ref_weight); // If there is a second valid mv then add it as well. - if (c2_ref_frame != INTRA_FRAME) { + if (c2_ref_frame > INTRA_FRAME) { scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); ref_weight = ref_distance_weight[i] + ((c2_ref_frame == ref_frame) << 4); @@ -304,7 +304,7 @@ void vp9_find_mv_refs( &index, c_refmv, ref_weight); // If there is a second valid mv then add it as well. - if (c2_ref_frame != INTRA_FRAME) { + if (c2_ref_frame > INTRA_FRAME) { scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); ref_weight = ref_distance_weight[i] + ((c2_ref_frame == ref_frame) << 4); @@ -337,7 +337,7 @@ void vp9_find_mv_refs( &index, c_refmv, ref_weight); // If there is a second valid mv then add it as well. - if (c2_ref_frame != INTRA_FRAME) { + if (c2_ref_frame > INTRA_FRAME) { scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); ref_weight = ref_distance_weight[i] + ((c2_ref_frame == ref_frame) << 4); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 5c5d4abbb9fb7970f9719b49218f82d6bafc42db..d2f7e701af02daf4aab85ba42104a525b300fffb 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -382,8 +382,8 @@ static void combine_interintra(MB_PREDICTION_MODE mode, // TODO(debargha): Explore different ways of combining predictors // or designing the tables below static const int scale_bits = 8; - static const int scale_max = 1 << scale_bits; - static const int scale_round = (1 << scale_bits) - 1; + static const int scale_max = 256; // 1 << scale_bits; + static const int scale_round = 127; // (1 << (scale_bits - 1)); // This table is a function A + B*exp(-kx), where x is hor. index static const int weights1d[32] = { 128, 122, 116, 111, 107, 103, 99, 96, diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 6842a150db0f9eff55802aa835a60c4e83fe32e7..e037d0ddfd4d8b58d061e92aab003a1a7b63b646 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -28,6 +28,10 @@ #ifdef DEBUG_DEC_MV int dec_mvcount = 0; #endif +// #define DEC_DEBUG +#ifdef DEC_DEBUG +extern int dec_debug; +#endif static int read_bmode(vp9_reader *bc, const vp9_prob *p) { B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p); @@ -775,6 +779,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; +#ifdef DEC_DEBUG + if (dec_debug) + printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row, + xd->mode_info_context->mbmi.mv[0].as_mv.col); +#endif vp9_find_mv_refs(xd, mi, prev_mi, ref_frame, mbmi->ref_mvs[ref_frame], cm->ref_frame_sign_bias); @@ -787,6 +796,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, vp9_mv_ref_probs(&pbi->common, mv_ref_p, mbmi->mb_mode_context[ref_frame]); +#ifdef DEC_DEBUG + if (dec_debug) + printf("[D %d %d] %d %d %d %d\n", ref_frame, + mbmi->mb_mode_context[ref_frame], + mv_ref_p[0], mv_ref_p[1], mv_ref_p[2], mv_ref_p[3]); +#endif } // Is the segment level mode feature enabled for this segment diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 12865bfbb4775b72e6f4a21e286f96b663cab241..de3569ff40b8758644feeee54ebf409b720d5204 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -36,9 +36,13 @@ #include <assert.h> #include <stdio.h> - #define COEFCOUNT_TESTING +// #define DEC_DEBUG +#ifdef DEC_DEBUG +int dec_debug = 0; +#endif + static int merge_index(int v, int n, int modulus) { int max1 = (n - 1 - modulus / 2) / modulus + 1; if (v < max1) v = v * modulus + modulus / 2; @@ -225,6 +229,333 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { } } +static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, + BOOL_DECODER* const bc) { + BLOCKD *bd = &xd->block[0]; + TX_TYPE tx_type = get_tx_type_16x16(xd, bd); + assert(get_2nd_order_usage(xd) == 0); +#ifdef DEC_DEBUG + if (dec_debug) { + int i; + printf("\n"); + printf("qcoeff 16x16\n"); + for (i = 0; i < 400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i % 16 == 15) printf("\n"); + } + printf("\n"); + printf("predictor\n"); + for (i = 0; i < 400; i++) { + printf("%3d ", xd->predictor[i]); + if (i % 16 == 15) printf("\n"); + } + } +#endif + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, + xd->block[0].dequant, xd->predictor, + xd->dst.y_buffer, 16, xd->dst.y_stride); + } else { + vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + 16, xd->dst.y_stride, xd->eobs[0]); + } + vp9_dequant_idct_add_uv_block_8x8( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16, xd); +} + +static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, + BOOL_DECODER* const bc) { + // First do Y + // if the first one is DCT_DCT assume all the rest are as well + TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); +#ifdef DEC_DEBUG + if (dec_debug) { + int i; + printf("\n"); + printf("qcoeff 8x8\n"); + for (i = 0; i < 400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i % 16 == 15) printf("\n"); + } + } +#endif + if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) { + int i; + assert(get_2nd_order_usage(xd) == 0); + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + const int iblock[4] = {0, 1, 4, 5}; + int idx = (ib & 0x02) ? (ib + 2) : ib; + short *q = xd->block[idx].qcoeff; + short *dq = xd->block[0].dequant; + unsigned char *pre = xd->block[ib].predictor; + unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; + int stride = xd->dst.y_stride; + BLOCKD *b = &xd->block[ib]; + if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + int i8x8mode = b->bmi.as_mode.first; + vp9_intra8x8_predict(b, i8x8mode, b->predictor); + } + tx_type = get_tx_type_8x8(xd, &xd->block[ib]); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride); + } else { + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, + 0, xd->eobs[idx]); + } + } + } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { + assert(get_2nd_order_usage(xd) == 0); + vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, + xd->block[0].dequant, + xd->predictor, + xd->dst.y_buffer, + xd->dst.y_stride, + xd->eobs, xd); + } else { + assert(get_2nd_order_usage(xd) == 1); + BLOCKD *b = &xd->block[24]; + vp9_dequantize_b_2x2(b); + vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); + ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff, + xd->block[0].dequant, + xd->predictor, + xd->dst.y_buffer, + xd->dst.y_stride, + xd->eobs, + xd->block[24].diff, + xd); + } + + // Now do UV + if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + int i; + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + BLOCKD *b = &xd->block[ib]; + int i8x8mode = b->bmi.as_mode.first; + b = &xd->block[16 + i]; + vp9_intra_uv4x4_predict(&xd->block[16 + i], i8x8mode, b->predictor); + pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride); + b = &xd->block[20 + i]; + vp9_intra_uv4x4_predict(&xd->block[20 + i], i8x8mode, b->predictor); + pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride); + } + } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { + pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16); + } else { + vp9_dequant_idct_add_uv_block_8x8 + (xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16, xd); + } +#ifdef DEC_DEBUG + if (dec_debug) { + int i; + printf("\n"); + printf("predictor\n"); + for (i = 0; i < 384; i++) { + printf("%3d ", xd->predictor[i]); + if (i % 16 == 15) printf("\n"); + } + } +#endif +} + +static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, + BOOL_DECODER* const bc) { + TX_TYPE tx_type; + int i, eobtotal = 0; + MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + if (mode == I8X8_PRED) { + assert(get_2nd_order_usage(xd) == 0); + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + const int iblock[4] = {0, 1, 4, 5}; + int j; + int i8x8mode; + BLOCKD *b; + int idx = (ib & 0x02) ? (ib + 2) : ib; + short *q = xd->block[idx].qcoeff; + short *dq = xd->block[0].dequant; + unsigned char *pre = xd->block[ib].predictor; + unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; + int stride = xd->dst.y_stride; + b = &xd->block[ib]; + i8x8mode = b->bmi.as_mode.first; + vp9_intra8x8_predict(b, i8x8mode, b->predictor); + for (j = 0; j < 4; j++) { + b = &xd->block[ib + iblock[j]]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, + b->dst_stride); + } else { + vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + } + } + b = &xd->block[16 + i]; + vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); + pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride); + b = &xd->block[20 + i]; + vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); + pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride); + } + } else if (mode == B_PRED) { + assert(get_2nd_order_usage(xd) == 0); + for (i = 0; i < 16; i++) { + int b_mode; +#if CONFIG_COMP_INTRA_PRED + int b_mode2; +#endif + BLOCKD *b = &xd->block[i]; + b_mode = xd->mode_info_context->bmi[i].as_mode.first; +#if CONFIG_NEWBINTRAMODES + xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context = + vp9_find_bpred_context(b); +#endif + if (!xd->mode_info_context->mbmi.mb_skip_coeff) + eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); +#if CONFIG_COMP_INTRA_PRED + b_mode2 = xd->mode_info_context->bmi[i].as_mode.second; + + if (b_mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { +#endif + vp9_intra4x4_predict(b, b_mode, b->predictor); +#if CONFIG_COMP_INTRA_PRED + } else { + vp9_comp_intra4x4_predict(b, b_mode, b_mode2, b->predictor); + } +#endif + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + } else { + vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + } + xd->above_context->y2 = 1; + xd->left_context->y2 = 1; + } + if (!xd->mode_info_context->mbmi.mb_skip_coeff) { + vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); + } + vp9_build_intra_predictors_mbuv(xd); + pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->predictor + 16 * 16, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, + xd->eobs + 16); + } else if (mode == SPLITMV) { + assert(get_2nd_order_usage(xd) == 0); + pbi->idct_add_y_block(xd->qcoeff, + xd->block[0].dequant, + xd->predictor, + xd->dst.y_buffer, + xd->dst.y_stride, + xd->eobs); + pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->predictor + 16 * 16, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, + xd->eobs + 16); + } else { +#ifdef DEC_DEBUG + if (dec_debug) { + int i; + printf("\n"); + printf("qcoeff 4x4\n"); + for (i = 0; i < 400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i % 16 == 15) printf("\n"); + } + printf("\n"); + printf("predictor\n"); + for (i = 0; i < 400; i++) { + printf("%3d ", xd->predictor[i]); + if (i % 16 == 15) printf("\n"); + } + } +#endif + tx_type = get_tx_type_4x4(xd, &xd->block[0]); + if (tx_type != DCT_DCT) { + assert(get_2nd_order_usage(xd) == 0); + for (i = 0; i < 16; i++) { + BLOCKD *b = &xd->block[i]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, + b->dst_stride); + } else { + vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + } + } + } else { + assert(get_2nd_order_usage(xd) == 1); + BLOCKD *b = &xd->block[24]; + vp9_dequantize_b(b); + if (xd->eobs[24] > 1) { + vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } else { + xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + vp9_dequantize_b(b); + pbi->dc_idct_add_y_block(xd->qcoeff, + xd->block[0].dequant, + xd->predictor, + xd->dst.y_buffer, + xd->dst.y_stride, + xd->eobs, + xd->block[24].diff); + } + pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->predictor + 16 * 16, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, + xd->eobs + 16); + } +} + #if CONFIG_SUPERBLOCKS static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, unsigned int mb_col, @@ -295,40 +626,69 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } if (tx_size == TX_16X16) { - vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); + TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_16x16_c( + tx_type, xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride); + } else { + vp9_dequant_idct_add_16x16( + xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); + } vp9_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.uv_stride, xd->eobs + 16, xd); } else if (tx_size == TX_8X8) { - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(xd->qcoeff, - xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - vp9_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); - } else { - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); + if (tx_type != DCT_DCT) { + int i; + for (i = 0; i < 4; i++) { + int ib = vp9_i8x8_block[i]; + const int iblock[4] = {0, 1, 4, 5}; + const int ioffset[4] = {0, 1, 0, 1}; + int idx = (ib & 0x02) ? (ib + 2) : ib; + int i8x8mode = -1; + short *q = xd->block[idx].qcoeff; + short *dq = xd->block[0].dequant; + unsigned char *pre = xd->block[ib].predictor; + unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; + int stride = xd->dst.y_stride; + BLOCKD *b = &xd->block[ib]; + tx_type = get_tx_type_8x8(xd, &xd->block[ib]); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_8x8_c( + tx_type, q, dq, + xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + + x_idx * 16 + (i & 1) * 8, + xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + + x_idx * 16 + (i & 1) * 8, + stride, stride); + } else { + vp9_dequant_idct_add_8x8_c( + q, dq, + xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + + x_idx * 16 + (i & 1) * 8, + xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + + x_idx * 16 + (i & 1) * 8, + stride, stride, 0, b->eob); + } + vp9_dequant_idct_add_uv_block_8x8_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } + } else { + vp9_dequantize_b_2x2(b); + vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); + ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; ((int *)b->qcoeff)[3] = 0; @@ -336,17 +696,63 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[5] = 0; ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; + vp9_dequant_dc_idct_add_y_block_8x8_inplace_c( + xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + vp9_dequant_idct_add_uv_block_8x8_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } + } else { + TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); + if (tx_type != DCT_DCT) { + for (i = 0; i < 16; i++) { + BLOCKD *b = &xd->block[i]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_ht_dequant_idct_add_c( + tx_type, b->qcoeff, b->dequant, + xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + + x_idx * 16 + (i & 3) * 4, + xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + + x_idx * 16 + (i & 3) * 4, + xd->dst.y_stride, xd->dst.y_stride); + } else { + vp9_dequant_idct_add_c( + b->qcoeff, b->dequant, + xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + + x_idx * 16 + (i & 3) * 4, + xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + + x_idx * 16 + (i & 3) * 4, + xd->dst.y_stride, xd->dst.y_stride); + } + } } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + vp9_dequantize_b(b); + if (xd->eobs[24] > 1) { + vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } else { + xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + vp9_dequant_dc_idct_add_y_block_4x4_inplace_c( + xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); } - - vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(xd->qcoeff, - xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - vp9_dequant_idct_add_uv_block_4x4_inplace_c(xd->qcoeff + 16 * 16, - xd->block[16].dequant, + vp9_dequant_idct_add_uv_block_4x4_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, xd->dst.uv_stride, xd->eobs + 16, xd); @@ -406,6 +812,10 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, skip_recon_mb(pbi, xd); return; } +#ifdef DEC_DEBUG + if (dec_debug) + printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size); +#endif // moved to be performed before detokenization // if (xd->segmentation_enabled) @@ -423,168 +833,41 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_build_inter_predictors_mb(xd); } - /* dequantization and idct */ - if (mode == I8X8_PRED) { - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - const int iblock[4] = {0, 1, 4, 5}; - int j; - int i8x8mode; - BLOCKD *b; - - int idx = (ib & 0x02) ? (ib + 2) : ib; - - short *q = xd->block[idx].qcoeff; - short *dq = xd->block[0].dequant; - unsigned char *pre = xd->block[ib].predictor; - unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; - int stride = xd->dst.y_stride; - - b = &xd->block[ib]; - i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(b, i8x8mode, b->predictor); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - tx_type = get_tx_type(xd, &xd->block[idx]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_8x8_c(tx_type, - q, dq, pre, dst, 16, stride); - } else { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, - xd->eobs[idx]); - } - q += 64; - } else { - for (j = 0; j < 4; j++) { - b = &xd->block[ib + iblock[j]]; - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } - } - b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); - b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); - } - } else if (mode == B_PRED) { + if (tx_size == TX_16X16) { + decode_16x16(pbi, xd, bc); + } else if (tx_size == TX_8X8) { + decode_8x8(pbi, xd, bc); + } else { + decode_4x4(pbi, xd, bc); + } +#ifdef DEC_DEBUG + if (dec_debug) { + int i, j; + printf("\n"); + printf("final y\n"); for (i = 0; i < 16; i++) { - int b_mode; -#if CONFIG_COMP_INTRA_PRED - int b_mode2; -#endif - BLOCKD *b = &xd->block[i]; - b_mode = xd->mode_info_context->bmi[i].as_mode.first; -#if CONFIG_NEWBINTRAMODES - xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context = - vp9_find_bpred_context(b); -#endif - if (!xd->mode_info_context->mbmi.mb_skip_coeff) - eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); -#if CONFIG_COMP_INTRA_PRED - b_mode2 = xd->mode_info_context->bmi[i].as_mode.second; - - if (b_mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, b_mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, b_mode, b_mode2, b->predictor); - } -#endif - - tx_type = get_tx_type(xd, b); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; + for (j = 0; j < 16; j++) + printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]); + printf("\n"); } - if (!xd->mode_info_context->mbmi.mb_skip_coeff) - vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); - } else if (mode == SPLITMV) { - if (tx_size == TX_8X8) { - vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd); - } else { - pbi->idct_add_y_block(xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); + printf("\n"); + printf("final u\n"); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]); + printf("\n"); } } else { - BLOCKD *b = &xd->block[24]; - - if (tx_size == TX_16X16) { - BLOCKD *bd = &xd->block[0]; - tx_type = get_tx_type(xd, bd); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, - xd->block[0].dequant, xd->predictor, - xd->dst.y_buffer, 16, xd->dst.y_stride); - } else { - vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - 16, xd->dst.y_stride, xd->eobs[0]); - } - } else if (tx_size == TX_8X8) { - vp9_dequantize_b_2x2(b); - vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff, - xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - } else { - vp9_dequantize_b(b); - if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - } else { - xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } - - pbi->dc_idct_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, - xd->dst.y_buffer, xd->dst.y_stride, xd->eobs, - xd->block[24].diff); + printf("\n"); + printf("final v\n"); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]); + printf("\n"); } + fflush(stdout); } - - if ((tx_size == TX_8X8 && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV) - || tx_size == TX_16X16 - ) - vp9_dequant_idct_add_uv_block_8x8 - (xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16, xd); - else if (xd->mode_info_context->mbmi.mode != I8X8_PRED) - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16); +#endif } @@ -683,7 +966,17 @@ decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, #if CONFIG_SUPERBLOCKS } #endif - +#ifdef DEC_DEBUG + dec_debug = (pbi->common.current_video_frame == 73 && + mb_row == 4 && mb_col == 13); + if (dec_debug) +#if CONFIG_SUPERBLOCKS + printf("Enter Debug %d %d sb %d\n", mb_row, mb_col, + mi->mbmi.encoded_as_sb); +#else + printf("Enter Debug %d %d\n", mb_row, mb_col); +#endif +#endif xd->up_available = (mb_row != 0); xd->left_available = (mb_col != 0); @@ -898,51 +1191,6 @@ static void init_frame(VP9D_COMP *pbi) { } -#if 0 -static void read_coef_probs2(VP9D_COMP *pbi) { - const vp9_prob grpupd = 192; - int i, j, k, l; - vp9_reader *const bc = &pbi->bc; - VP9_COMMON *const pc = &pbi->common; - for (l = 0; l < ENTROPY_NODES; l++) { - if (vp9_read(bc, grpupd)) { - // printf("Decoding %d\n", l); - for (i = 0; i < BLOCK_TYPES; i++) - for (j = !i; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k >= 3 && ((i == 0 && j == 1) || - (i > 0 && j == 0))) - continue; - { - vp9_prob *const p = pc->fc.coef_probs [i][j][k] + l; - int u = vp9_read(bc, COEF_UPDATE_PROB); - if (u) *p = read_prob_diff_update(bc, *p); - } - } - } - } - if (pbi->common.txfm_mode == ALLOW_8X8) { - for (l = 0; l < ENTROPY_NODES; l++) { - if (vp9_read(bc, grpupd)) { - for (i = 0; i < BLOCK_TYPES_8X8; i++) - for (j = !i; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k >= 3 && ((i == 0 && j == 1) || - (i > 0 && j == 0))) - continue; - { - vp9_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l; - - int u = vp9_read(bc, COEF_UPDATE_PROB_8X8); - if (u) *p = read_prob_diff_update(bc, *p); - } - } - } - } - } -} -#endif - static void read_coef_probs_common( BOOL_DECODER* const bc, vp9_prob coef_probs[BLOCK_TYPES][COEF_BANDS] @@ -1067,6 +1315,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } } } +#ifdef DEC_DEBUG + printf("Decode frame %d\n", pc->current_video_frame); +#endif if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || pc->Width == 0 || pc->Height == 0) { diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 633e3a4bfc9f7ae0494e42b22cc8d791a3e287ac..b119418e8a42314d877f7fbd5139cbf95c247de6 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -14,10 +14,6 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/decoder/vp9_onyxd_int.h" -#ifdef DEC_DEBUG -extern int dec_debug; -#endif - static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride, int width, int height) { int r, c; @@ -201,16 +197,6 @@ void vp9_dequantize_b_2x2_c(BLOCKD *d) { for (i = 0; i < 16; i++) { DQ[i] = (int16_t)((Q[i] * DQC[i])); } -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Dequantize 2x2\n"); - for (j = 0; j < 16; j++) printf("%d ", Q[j]); - printf("\n"); - for (j = 0; j < 16; j++) printf("%d ", DQ[j]); - printf("\n"); - } -#endif } void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, @@ -220,17 +206,6 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, int16_t *diff_ptr = output; int i; -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Input 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", input[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif - /* If dc is 1, then input[0] is the reconstructed value, do not need * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. */ @@ -282,47 +257,13 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, for (i = 1; i < 64; i++) { input[i] = input[i] * dq[1]; } -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Input DQ 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", input[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif - // the idct halves ( >> 1) the pitch vp9_short_idct8x8_c(input, output, 16); -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Output 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", output[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif vpx_memset(input, 0, 128); add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); -#ifdef DEC_DEBUG - if (dec_debug) { - int k, j; - printf("Final 8x8\n"); - for (j = 0; j < 8; j++) { - for (k = 0; k < 8; k++) { - printf("%d ", origdest[k]); - } - printf("\n"); - origdest += stride; - } - } -#endif } } diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 1a7a08fa44ffeec6731e030e1b8ca54f85ac26cf..abc0c3a86bad64167511ec7cd007db64f3e49470 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -59,12 +59,11 @@ static const unsigned char cat6_prob[14] = { 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts for Y2 blocks */ + /* Clear entropy contexts */ if ((xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV) - || xd->mode_info_context->mbmi.txfm_size == TX_16X16 - ) { + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV) + || xd->mode_info_context->mbmi.txfm_size == TX_16X16) { vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } else { @@ -309,10 +308,9 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; + int has_2nd_order = get_2nd_order_usage(xd); // 2nd order DC block - if (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mode != I8X8_PRED) { + if (has_2nd_order) { ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[24]; ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[24]; @@ -325,6 +323,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, } else { xd->above_context->y2 = 1; xd->left_context->y2 = 1; + eobs[24] = 0; type = PLANE_TYPE_Y_WITH_DC; } @@ -336,7 +335,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type, type == PLANE_TYPE_Y_WITH_DC ? - get_tx_type(xd, xd->block + i) : DCT_DCT, + get_tx_type(xd, xd->block + i) : DCT_DCT, seg_eob, xd->block[i].qcoeff, vp9_default_zig_zag1d_8x8, TX_8X8, vp9_coef_bands_8x8); @@ -392,7 +391,7 @@ int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, TX_TYPE tx_type = DCT_DCT; if (type == PLANE_TYPE_Y_WITH_DC) - tx_type = get_tx_type(xd, &xd->block[i]); + tx_type = get_tx_type_4x4(xd, &xd->block[i]); switch (tx_type) { case ADST_DCT : scan = vp9_row_scan; @@ -429,14 +428,15 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, int i, eobtotal = 0; PLANE_TYPE type; - if (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV) { + int has_2nd_order = get_2nd_order_usage(xd); + + if (has_2nd_order) { eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24) - 16; type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 1; xd->left_context->y2 = 1; + xd->eobs[24] = 0; type = PLANE_TYPE_Y_WITH_DC; } diff --git a/vp9/encoder/arm/vp9_quantize_arm.c b/vp9/encoder/arm/vp9_quantize_arm.c index f300871662437e9dfebd05a76f6ae89d40193c9b..aacaa529ceb647b9bbe2ba06a49974d4d46f6ad6 100644 --- a/vp9/encoder/arm/vp9_quantize_arm.c +++ b/vp9/encoder/arm/vp9_quantize_arm.c @@ -23,8 +23,7 @@ * the regular quantize_b function pointer */ void vp8_quantize_mby_neon(MACROBLOCK *x) { int i; - int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED - && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + int has_2nd_order = get_2nd_order_usage(xd); for (i = 0; i < 16; i += 2) x->quantize_b_pair(&x->block[i], &x->block[i + 1], @@ -36,8 +35,7 @@ void vp8_quantize_mby_neon(MACROBLOCK *x) { void vp8_quantize_mb_neon(MACROBLOCK *x) { int i; - int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED - && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + int has_2nd_order = get_2nd_order_usage(xd); for (i = 0; i < 24; i += 2) x->quantize_b_pair(&x->block[i], &x->block[i + 1], diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 644f2082b9ab7234baf70fb7d1316222a4b348ac..c5c93214378c819e59f3363dcd878e17629cc38c 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -939,7 +939,6 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { #endif write_ymode(bc, mode, pc->fc.ymode_prob); } - if (mode == B_PRED) { int j = 0; #if CONFIG_COMP_INTRA_PRED diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3789946304923892024626d9c19aa8d84b1e2f31..bd41461282a5d1b1308ea346827491e7aab302db 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -47,9 +47,9 @@ #define IF_RTCD(x) NULL #endif +// #define ENC_DEBUG #ifdef ENC_DEBUG int enc_debug = 0; -int mb_row_debug, mb_col_debug; #endif static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, @@ -557,7 +557,7 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_mvs[rf], &best_mv); mbmi->best_index = best_index; - if (mbmi->second_ref_frame) { + if (mbmi->second_ref_frame > 0) { unsigned int best_index; best_index = pick_best_mv_ref(x, sec_ref_frame, mbmi->mv[1], @@ -764,6 +764,12 @@ static void pick_mb_modes(VP9_COMP *cpi, cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs; } +#ifdef ENC_DEBUG + enc_debug = (cpi->common.current_video_frame == 73 && + mb_row == 4 && mb_col == 13); + if (enc_debug) + printf("pick_mb_modes %d %d\n", mb_row, mb_col); +#endif vp9_pick_mode_inter_macroblock(cpi, x, recon_yoffset, recon_uvoffset, &r, &d); *totalrate += r; @@ -1015,13 +1021,6 @@ static void encode_sb(VP9_COMP *cpi, xd->mb_index = i; -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 0 && - mb_row == 0 && mb_col == 0); - mb_col_debug = mb_col; - mb_row_debug = mb_row; -#endif - // Restore MB state to that when it was picked #if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.encoded_as_sb) { @@ -1424,7 +1423,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { TOKENEXTRA *tp = cpi->tok; int totalrate; - //printf("encode_frame_internal\n"); + // printf("encode_frame_internal frame %d (%d)\n", + // cpi->common.current_video_frame, cpi->common.show_frame); // Compute a modified set of reference frame probabilities to use when // prediction fails. These are based on the current general estimates for @@ -2033,6 +2033,12 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, assert(!xd->mode_info_context->mbmi.encoded_as_sb); #endif +#ifdef ENC_DEBUG + enc_debug = (cpi->common.current_video_frame == 73 && + mb_row == 4 && mb_col == 13); + if (enc_debug) + printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled); +#endif if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) { // Adjust the zbin based on this MB rate. @@ -2076,6 +2082,12 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, } if (mbmi->ref_frame == INTRA_FRAME) { +#ifdef ENC_DEBUG + if (enc_debug) { + printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip, + mbmi->txfm_size); + } +#endif if (mbmi->mode == B_PRED) { vp9_encode_intra16x16mbuv(x); vp9_encode_intra4x4mby(x); @@ -2091,6 +2103,13 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, sum_intra_stats(cpi, x); } else { int ref_fb_idx; +#ifdef ENC_DEBUG + if (enc_debug) + printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d\n", + mbmi->mode, x->skip, mbmi->txfm_size, + mbmi->ref_frame, mbmi->second_ref_frame, + mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col); +#endif assert(cm->frame_type != KEY_FRAME); @@ -2131,40 +2150,88 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mb_skip_coeff = 0; } else { - vp9_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, + vp9_build_1st_inter16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + vp9_build_2nd_inter16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } +#if CONFIG_COMP_INTERINTRA_PRED + else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } +#endif } } if (!x->skip) { #ifdef ENC_DEBUG if (enc_debug) { - int i; - printf("Segment=%d [%d, %d]: %d %d:\n", mbmi->segment_id, mb_col_debug, - mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge); + int i, j; + printf("\n"); + printf("qcoeff\n"); for (i = 0; i < 400; i++) { printf("%3d ", xd->qcoeff[i]); if (i % 16 == 15) printf("\n"); } printf("\n"); - printf("eobs = "); - for (i = 0; i < 25; i++) - printf("%d:%d ", i, xd->block[i].eob); + printf("predictor\n"); + for (i = 0; i < 384; i++) { + printf("%3d ", xd->predictor[i]); + if (i % 16 == 15) printf("\n"); + } + printf("\n"); + printf("src_diff\n"); + for (i = 0; i < 384; i++) { + printf("%3d ", x->src_diff[i]); + if (i % 16 == 15) printf("\n"); + } + printf("\n"); + printf("diff\n"); + for (i = 0; i < 384; i++) { + printf("%3d ", xd->block[0].diff[i]); + if (i % 16 == 15) printf("\n"); + } + printf("\n"); + printf("final y\n"); + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) + printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]); + printf("\n"); + } + printf("\n"); + printf("final u\n"); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]); + printf("\n"); + } printf("\n"); + printf("final v\n"); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) + printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]); + printf("\n"); + } fflush(stdout); } #endif vp9_tokenize_mb(cpi, xd, t, !output_enabled); -#ifdef ENC_DEBUG - if (enc_debug) { - printf("Tokenized\n"); - fflush(stdout); - } -#endif } else { int mb_skip_context = cpi->common.mb_no_coeff_skip ? diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 017c178b9030fc0af9e3f342397f516cbf7c2976..0f813d9b3ef8f1848fc9ebdf78412ea15a194a54 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -72,7 +72,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { vp9_subtract_b(be, b, 16); - tx_type = get_tx_type(&x->e_mbd, b); + tx_type = get_tx_type_4x4(&x->e_mbd, b); if (tx_type != DCT_DCT) { vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); @@ -112,21 +112,11 @@ void vp9_encode_intra16x16mby(MACROBLOCK *x) { vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); if (tx_size == TX_16X16) { - BLOCKD *bd = &xd->block[0]; - tx_type = get_tx_type(xd, bd); - if (tx_type != DCT_DCT) { - vp9_fht(b->src_diff, 32, b->coeff, tx_type, 16); - vp9_quantize_mby_16x16(x); - if (x->optimize) - vp9_optimize_mby_16x16(x); - vp9_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16); - } else { - vp9_transform_mby_16x16(x); - vp9_quantize_mby_16x16(x); - if (x->optimize) - vp9_optimize_mby_16x16(x); - vp9_inverse_transform_mby_16x16(xd); - } + vp9_transform_mby_16x16(x); + vp9_quantize_mby_16x16(x); + if (x->optimize) + vp9_optimize_mby_16x16(x); + vp9_inverse_transform_mby_16x16(xd); } else if (tx_size == TX_8X8) { vp9_transform_mby_8x8(x); vp9_quantize_mby_8x8(x); @@ -196,14 +186,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { b->predictor); } #endif + // generate residual blocks + vp9_subtract_4b_c(be, b, 16); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { int idx = (ib & 0x02) ? (ib + 2) : ib; - // generate residual blocks - vp9_subtract_4b_c(be, b, 16); - - tx_type = get_tx_type(xd, xd->block + idx); + tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); @@ -219,10 +208,16 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { for (i = 0; i < 4; i++) { b = &xd->block[ib + iblock[i]]; be = &x->block[ib + iblock[i]]; - vp9_subtract_b(be, b, 16); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); + vp9_ht_quantize_b_4x4(be, b, tx_type); + vp9_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4); + } else { + x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4(be, b); + vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); + } } } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 1a7a38261f3bd0fbe5e64fa4021d1e424c2c599d..68aa414479f6c630835f8f635882036123d557fd 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -128,24 +128,36 @@ static void build_dcblock_4x4(MACROBLOCK *x) { for (i = 0; i < 16; i++) { src_diff_ptr[i] = x->coeff[i * 16]; + x->coeff[i * 16] = 0; } } void vp9_transform_mby_4x4(MACROBLOCK *x) { int i; + MACROBLOCKD *xd = &x->e_mbd; + int has_2nd_order = get_2nd_order_usage(xd); - for (i = 0; i < 16; i += 2) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 32); + for (i = 0; i < 16; i++) { + BLOCK *b = &x->block[i]; + TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + assert(has_2nd_order == 0); + vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4); + } else { + x->vp9_short_fdct4x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } } - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { + if (has_2nd_order) { // build dc block from 16 y dc values build_dcblock_4x4(x); // do 2nd order transform on the dc block x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); + } else { + vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); } } @@ -174,27 +186,50 @@ static void build_dcblock_8x8(MACROBLOCK *x) { src_diff_ptr[1] = x->coeff[4 * 16]; src_diff_ptr[4] = x->coeff[8 * 16]; src_diff_ptr[8] = x->coeff[12 * 16]; + x->coeff[0 * 16] = 0; + x->coeff[4 * 16] = 0; + x->coeff[8 * 16] = 0; + x->coeff[12 * 16] = 0; } void vp9_transform_mby_8x8(MACROBLOCK *x) { int i; + MACROBLOCKD *xd = &x->e_mbd; + TX_TYPE tx_type; + int has_2nd_order = get_2nd_order_usage(xd); for (i = 0; i < 9; i += 8) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 32); + BLOCK *b = &x->block[i]; + tx_type = get_tx_type_8x8(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + assert(has_2nd_order == 0); + vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8); + } else { + x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } } for (i = 2; i < 11; i += 8) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], - &x->block[i + 2].coeff[0], 32); + BLOCK *b = &x->block[i]; + tx_type = get_tx_type_8x8(xd, &xd->block[i]); + if (tx_type != DCT_DCT) { + assert(has_2nd_order == 0); + vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8); + } else { + x->vp9_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i + 2].coeff[0], 32); + } } - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { + if (has_2nd_order) { // build dc block from 2x2 y dc values build_dcblock_8x8(x); // do 2nd order transform on the dc block x->short_fhaar2x2(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); + } else { + vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0])); } } @@ -213,9 +248,16 @@ void vp9_transform_mb_8x8(MACROBLOCK *x) { } void vp9_transform_mby_16x16(MACROBLOCK *x) { + MACROBLOCKD *xd = &x->e_mbd; + BLOCK *b = &x->block[0]; + TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); vp9_clear_system_state(); - x->vp9_short_fdct16x16(&x->block[0].src_diff[0], - &x->block[0].coeff[0], 32); + if (tx_type != DCT_DCT) { + vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16); + } else { + x->vp9_short_fdct16x16(&x->block[0].src_diff[0], + &x->block[0].coeff[0], 32); + } } void vp9_transform_mb_16x16(MACROBLOCK *x) { @@ -299,7 +341,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, // TODO: this isn't called (for intra4x4 modes), but will be left in // since it could be used later { - TX_TYPE tx_type = get_tx_type(&mb->e_mbd, d); + TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d); if (tx_type != DCT_DCT) { switch (tx_type) { case ADST_DCT: @@ -579,7 +621,8 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - has_2nd_order = (mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV); + has_2nd_order = get_2nd_order_usage(&x->e_mbd); + type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b++) { @@ -628,7 +671,7 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; + int has_2nd_order = get_2nd_order_usage(&x->e_mbd); if (!x->e_mbd.above_context || !x->e_mbd.left_context) return; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index d3cd8eb54940417f0329f98b68a00cbce93ff87c..f160edb565e112907345c9321e99b938e9de9587 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -136,13 +136,26 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { void vp9_quantize_mby_4x4_c(MACROBLOCK *x) { int i; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; + int has_2nd_order = get_2nd_order_usage(&x->e_mbd); - for (i = 0; i < 16; i++) - x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]); - - if (has_2nd_order) + for (i = 0; i < 16; i++) { + TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]); + if (tx_type != DCT_DCT) { + assert(has_2nd_order == 0); + vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type); + } else { + x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]); + } + } + if (has_2nd_order) { x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]); + } else { + vpx_memset(x->e_mbd.block[24].qcoeff, 0, + 16 * sizeof(x->e_mbd.block[24].qcoeff[0])); + vpx_memset(x->e_mbd.block[24].dqcoeff, 0, + 16 * sizeof(x->e_mbd.block[24].dqcoeff[0])); + x->e_mbd.block[24].eob = 0; + } } void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) { @@ -257,17 +270,29 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { void vp9_quantize_mby_8x8(MACROBLOCK *x) { int i; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; + int has_2nd_order = get_2nd_order_usage(&x->e_mbd); for (i = 0; i < 16; i ++) { x->e_mbd.block[i].eob = 0; } x->e_mbd.block[24].eob = 0; - for (i = 0; i < 16; i += 4) + for (i = 0; i < 16; i += 4) { + int ib = (i & 8) + ((i & 4) >> 1); + TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, &x->e_mbd.block[ib]); + if (tx_type != DCT_DCT) + assert(has_2nd_order == 0); x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + } - if (has_2nd_order) + if (has_2nd_order) { x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); + } else { + vpx_memset(x->e_mbd.block[24].qcoeff, 0, + 16 * sizeof(x->e_mbd.block[24].qcoeff[0])); + vpx_memset(x->e_mbd.block[24].dqcoeff, 0, + 16 * sizeof(x->e_mbd.block[24].dqcoeff[0])); + x->e_mbd.block[24].eob = 0; + } } void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 760593acb89992cfb705b0d0a147f4571917596c..22a6d9a695c0d20aefb432063c2967d81219828c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -432,6 +432,25 @@ int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) { return error; } +int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) { + BLOCK *be; + BLOCKD *bd; + int i, j; + int berror, error = 0; + + for (i = 0; i < 16; i+=4) { + be = &mb->block[i]; + bd = &mb->e_mbd.block[i]; + berror = 0; + for (j = dc; j < 64; j++) { + int this_diff = be->coeff[j] - bd->dqcoeff[j]; + berror += this_diff * this_diff; + } + error += berror; + } + return error; +} + int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) { BLOCK *be; BLOCKD *bd; @@ -441,17 +460,13 @@ int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) { for (i = 0; i < 16; i++) { be = &mb->block[i]; bd = &mb->e_mbd.block[i]; - berror = 0; - for (j = dc; j < 16; j++) { int this_diff = be->coeff[j] - bd->dqcoeff[j]; berror += this_diff * this_diff; } - error += berror; } - return error; } @@ -645,7 +660,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, return cost; } -static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { +static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -665,13 +680,16 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, + cost += cost_coeffs(mb, xd->block + b, + (has_2nd_order ? + PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4); - cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24], - TX_4X4); + if (has_2nd_order) + cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, + ta + vp9_block2above[24], tl + vp9_block2left[24], + TX_4X4); return cost; } @@ -686,38 +704,24 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, BLOCKD *const x_y2 = xd->block + 24; short *Y2DCPtr = mb_y2->src_diff; BLOCK *beptr; - int d; + int d, i, has_2nd_order; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; + has_2nd_order = get_2nd_order_usage(xd); // Fdct and building the 2nd order block - for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { - mb->vp9_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); - *Y2DCPtr++ = beptr->coeff[0]; - *Y2DCPtr++ = beptr->coeff[16]; - } - - // 2nd order fdct - mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); - - // Quantization - for (b = 0; b < 16; b++) { - mb->quantize_b_4x4(&mb->block[b], &xd->block[b]); - } - - // DC predication and Quantization of 2nd Order block - mb->quantize_b_4x4(mb_y2, x_y2); - - // Distortion - d = vp9_mbblock_error(mb, 1); - - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); + vp9_transform_mby_4x4(mb); + vp9_quantize_mby_4x4(mb); + d = vp9_mbblock_error(mb, has_2nd_order); + if (has_2nd_order) + d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); *Distortion = (d >> 2); // rate - *Rate = rdcost_mby_4x4(mb, backup); - *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, 1); + *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup); + *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order); } -static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { +static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -737,12 +741,15 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, + cost += cost_coeffs(mb, xd->block + b, + (has_2nd_order ? + PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], TX_8X8); - cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24]); + if (has_2nd_order) + cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2, + ta + vp9_block2above[24], tl + vp9_block2left[24]); return cost; } @@ -753,28 +760,21 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, MACROBLOCKD *const xd = &mb->e_mbd; BLOCK *const mb_y2 = mb->block + 24; BLOCKD *const x_y2 = xd->block + 24; - int d; + int d, has_2nd_order; + + xd->mode_info_context->mbmi.txfm_size = TX_8X8; vp9_transform_mby_8x8(mb); vp9_quantize_mby_8x8(mb); - - /* remove 1st order dc to properly combine 1st/2nd order distortion */ - mb->coeff[0] = 0; - mb->coeff[64] = 0; - mb->coeff[128] = 0; - mb->coeff[192] = 0; - xd->dqcoeff[0] = 0; - xd->dqcoeff[64] = 0; - xd->dqcoeff[128] = 0; - xd->dqcoeff[192] = 0; - - d = vp9_mbblock_error(mb, 0); - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); + has_2nd_order = get_2nd_order_usage(xd); + d = vp9_mbblock_error_8x8_c(mb, has_2nd_order); + if (has_2nd_order) + d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); *Distortion = (d >> 2); // rate - *Rate = rdcost_mby_8x8(mb, backup); - *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, 1); + *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup); + *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order); } static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { @@ -806,12 +806,8 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, BLOCK *be = &mb->block[0]; TX_TYPE tx_type; - tx_type = get_tx_type_16x16(xd, b); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16); - } else - vp9_transform_mby_16x16(mb); - + xd->mode_info_context->mbmi.txfm_size = TX_16X16; + vp9_transform_mby_16x16(mb); vp9_quantize_mby_16x16(mb); // TODO(jingning) is it possible to quickly determine whether to force // trailing coefficients to be zero, instead of running trellis @@ -1379,7 +1375,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, #endif MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = INT64_MAX; - int distortion, rate = 0; + int distortion = 0, rate = 0; BLOCK *be = x->block + ib; BLOCKD *b = xd->block + ib; ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0; @@ -1402,7 +1398,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { #endif int64_t this_rd; - int rate_t; + int rate_t = 0; // FIXME rate for compound mode and second intrapred mode rate = mode_costs[mode]; @@ -1421,6 +1417,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_subtract_4b_c(be, b, 16); + assert(get_2nd_order_usage(xd) == 0); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { TX_TYPE tx_type = get_tx_type_8x8(xd, b); if (tx_type != DCT_DCT) @@ -1442,35 +1439,32 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ta1 = ta0; tl1 = tl0; } else { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); - x->vp9_short_fdct8x4((be + 4)->src_diff, (be + 4)->coeff, 32); - - x->quantize_b_4x4_pair(x->block + ib, x->block + ib + 1, - xd->block + ib, xd->block + ib + 1); - x->quantize_b_4x4_pair(x->block + ib + 4, x->block + ib + 5, - xd->block + ib + 4, xd->block + ib + 5); - - distortion = vp9_block_error_c((x->block + ib)->coeff, - (xd->block + ib)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 1)->coeff, - (xd->block + ib + 1)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 4)->coeff, - (xd->block + ib + 4)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 5)->coeff, - (xd->block + ib + 5)->dqcoeff, 16); - + static const int iblock[4] = {0, 1, 4, 5}; + TX_TYPE tx_type; + int i; ta0 = a[vp9_block2above[ib]]; ta1 = a[vp9_block2above[ib + 1]]; tl0 = l[vp9_block2left[ib]]; tl1 = l[vp9_block2left[ib + 4]]; - rate_t = cost_coeffs(x, xd->block + ib, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 1, PLANE_TYPE_Y_WITH_DC, - &ta1, &tl0, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 4, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl1, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 5, PLANE_TYPE_Y_WITH_DC, - &ta1, &tl1, TX_4X4); + distortion = 0; + rate_t = 0; + for (i = 0; i < 4; ++i) { + b = &xd->block[ib + iblock[i]]; + be = &x->block[ib + iblock[i]]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); + vp9_ht_quantize_b_4x4(be, b, tx_type); + } else { + x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4(be, b); + } + distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16); + rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, + // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, + &ta0, &tl0, + TX_4X4); + } rate += rate_t; } @@ -2158,17 +2152,17 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, } else /* 8x8 */ { if (otherrd) { for (j = 0; j < 4; j += 2) { - BLOCKD *bd3 = &xd->block[ib + iblock[j]]; - BLOCK *be3 = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be3->src_diff, be3->coeff, 32); - x->quantize_b_4x4_pair(be3, be3 + 1, bd3, bd3 + 1); - thisdistortion = vp9_block_error_c(be3->coeff, bd3->dqcoeff, 32); + BLOCKD *bd = &xd->block[ib + iblock[j]]; + BLOCK *be = &x->block[ib + iblock[j]]; + x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); + thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; - othercost += cost_coeffs(x, bd3, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[ib + iblock[j]], tlcp + vp9_block2left[ib + iblock[j]], TX_4X4); - othercost += cost_coeffs(x, bd3 + 1, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[ib + iblock[j] + 1], tlcp + vp9_block2left[ib + iblock[j]], TX_4X4); @@ -4445,8 +4439,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; - assert(ref_frame == INTRA_FRAME || - (cpi->ref_frame_flags & flag_list[ref_frame])); + if (!(ref_frame == INTRA_FRAME || + (cpi->ref_frame_flags & flag_list[ref_frame]))) { + continue; + } mbmi->ref_frame = ref_frame; comp_pred = vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME; mbmi->mode = this_mode; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 41b7f0edfd6a88590e1375dc671acb88fdee3667..292b6be961ec8cc5d80a06f3564b1b5495a759e8 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -215,11 +215,11 @@ static void tokenize_b(VP9_COMP *cpi, *a = *l = (c != !type); /* 0 <-> all coeff data is zero */ } -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block) { +int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) { int skip = 1; int i = 0; - if (has_y2_block) { + if (has_2nd_order) { for (i = 0; i < 16; i++) skip &= (xd->block[i].eob < 2); skip &= (!xd->block[24].eob); @@ -239,16 +239,16 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { return skip; } -static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_4x4(xd, has_y2_block) & +static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) { + return (vp9_mby_is_skippable_4x4(xd, has_2nd_order) & vp9_mbuv_is_skippable_4x4(xd)); } -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block) { +int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) { int skip = 1; int i = 0; - if (has_y2_block) { + if (has_2nd_order) { for (i = 0; i < 16; i += 4) skip &= (xd->block[i].eob < 2); skip &= (!xd->block[24].eob); @@ -263,13 +263,13 @@ int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { return (!xd->block[16].eob) & (!xd->block[20].eob); } -static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_8x8(xd, has_y2_block) & +static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) { + return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) & vp9_mbuv_is_skippable_8x8(xd)); } -static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_8x8(xd, has_y2_block) & +static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_2nd_order) { + return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) & vp9_mbuv_is_skippable_4x4(xd)); } @@ -288,7 +288,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run) { PLANE_TYPE plane_type; - int has_y2_block; + int has_2nd_order; int b; int tx_size = xd->mode_info_context->mbmi.txfm_size; int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP); @@ -308,10 +308,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } else skip_inc = 0; - has_y2_block = (tx_size != TX_16X16 - && xd->mode_info_context->mbmi.mode != B_PRED - && xd->mode_info_context->mbmi.mode != I8X8_PRED - && xd->mode_info_context->mbmi.mode != SPLITMV); + has_2nd_order = get_2nd_order_usage(xd); switch (tx_size) { case TX_16X16: @@ -320,13 +317,16 @@ void vp9_tokenize_mb(VP9_COMP *cpi, case TX_8X8: if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8_4x4uv(xd, 0); + xd->mode_info_context->mbmi.mb_skip_coeff = + mb_is_skippable_8x8_4x4uv(xd, 0); else - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(xd, has_y2_block); + xd->mode_info_context->mbmi.mb_skip_coeff = + mb_is_skippable_8x8(xd, has_2nd_order); break; default: - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_4x4(xd, has_y2_block); + xd->mode_info_context->mbmi.mb_skip_coeff = + mb_is_skippable_4x4(xd, has_2nd_order); break; } @@ -346,7 +346,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - if (has_y2_block) { + if (has_2nd_order) { if (tx_size == TX_8X8) { tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], @@ -736,11 +736,9 @@ static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; PLANE_TYPE plane_type; int b; - const int has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV); + int has_2nd_order = get_2nd_order_usage(xd); - if (has_y2_block) { + if (has_2nd_order) { stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], TX_8X8, dry_run); @@ -792,11 +790,13 @@ static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; int b; PLANE_TYPE plane_type; - const int has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV); + int has_2nd_order = (xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV); + if (has_2nd_order && get_tx_type(xd, &xd->block[0]) != DCT_DCT) + has_2nd_order = 0; - if (has_y2_block) { + if (has_2nd_order) { stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above[24], L + vp9_block2left[24], TX_4X4, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; @@ -819,10 +819,21 @@ static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; + PLANE_TYPE plane_type; int b; + int has_2nd_order = get_2nd_order_usage(xd); + if (has_2nd_order) { + stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, + A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], + TX_8X8, dry_run); + plane_type = PLANE_TYPE_Y_NO_DC; + } else { + plane_type = PLANE_TYPE_Y_WITH_DC; + } + for (b = 0; b < 16; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_Y_WITH_DC, + stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], TX_8X8, dry_run); A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; @@ -859,10 +870,10 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } void vp9_fix_contexts(MACROBLOCKD *xd) { - /* Clear entropy contexts for Y2 blocks */ + /* Clear entropy contexts for blocks */ if ((xd->mode_info_context->mbmi.mode != B_PRED - && xd->mode_info_context->mbmi.mode != I8X8_PRED - && xd->mode_info_context->mbmi.mode != SPLITMV) + && xd->mode_info_context->mbmi.mode != I8X8_PRED + && xd->mode_info_context->mbmi.mode != SPLITMV) || xd->mode_info_context->mbmi.txfm_size == TX_16X16 ) { vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); diff --git a/vpxenc.c b/vpxenc.c index 83400b9c588c798bd076f5a634f2fb4f17b82d6e..af6db91081bc3eb982ff6106abaa67a8dcb402d3 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -1444,6 +1444,72 @@ static void show_rate_histogram(struct rate_hist *hist, show_histogram(hist->bucket, buckets, hist->total, scale); } +#define mmin(a, b) ((a) < (b) ? (a) : (b)) +static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2, + int yloc[2], int uloc[2], int vloc[2]) { + int match = 1; + int i, j; + yloc[0] = yloc[1] = -1; + for (i = 0, match = 1; match && i < img1->d_h; i+=32) { + for (j = 0; match && j < img1->d_w; j+=32) { + int k, l; + int si = mmin(i + 32, img1->d_h) - i; + int sj = mmin(j + 32, img1->d_w) - j; + for (k = 0; match && k < si; k++) + for (l = 0; match && l < sj; l++) { + if (*(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != + *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + match = 0; + break; + } + } + } + } + uloc[0] = uloc[1] = -1; + for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) { + for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) { + int k, l; + int si = mmin(i + 16, (img1->d_h + 1) / 2) - i; + int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j; + for (k = 0; match && k < si; k++) + for (l = 0; match && l < sj; l++) { + if (*(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l) != + *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + match = 0; + break; + } + } + } + } + vloc[0] = vloc[1] = -1; + for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) { + for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) { + int k, l; + int si = mmin(i + 16, (img1->d_h + 1) / 2) - i; + int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j; + for (k = 0; match && k < si; k++) + for (l = 0; match && l < sj; l++) { + if (*(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l) != + *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + match = 0; + break; + } + } + } + } +} static int compare_img(vpx_image_t *img1, vpx_image_t *img2) { @@ -2283,8 +2349,13 @@ static void test_decode(struct stream_state *stream) { if (!stream->mismatch_seen && !compare_img(&stream->ref_enc.img, &stream->ref_dec.img)) { /* TODO(jkoleszar): make fatal. */ - warn("Stream %d: Encode/decode mismatch on frame %d", - stream->index, stream->frames_out); + int y[2], u[2], v[2]; + find_mismatch(&stream->ref_enc.img, &stream->ref_dec.img, + y, u, v); + warn("Stream %d: Encode/decode mismatch on frame %d" + " at Y[%d, %d], U[%d, %d], V[%d, %d]", + stream->index, stream->frames_out, + y[0], y[1], u[0], u[1], v[0], v[1]); stream->mismatch_seen = stream->frames_out; } }