diff --git a/vp9/decoder/decodemv.c b/vp9/decoder/decodemv.c index 5013462a3d38f8902fcdf7fcbed409ecb289b6c2..a22df8f3c38b1ba96c0742b384db223b83c36b7b 100644 --- a/vp9/decoder/decodemv.c +++ b/vp9/decoder/decodemv.c @@ -186,11 +186,6 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) - m->mbmi.txfm_size = TX_8X8; - else -#endif if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 && m->mbmi.mode <= I8X8_PRED) { // FIXME(rbultje) code ternary symbol once all experiments are merged @@ -1132,11 +1127,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } -#if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) - mbmi->txfm_size = TX_8X8; - else -#endif if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) || (mbmi->ref_frame != INTRA_FRAME && !(mbmi->mode == SPLITMV && diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c index 60bf7bacab1ae9b96ca603077c04d0aec9876326..562b55e8f112be9005e331cb0de8512b3d15dd88 100644 --- a/vp9/decoder/decodframe.c +++ b/vp9/decoder/decodframe.c @@ -205,6 +205,146 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { } } +#if CONFIG_SUPERBLOCKS +static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, unsigned int mb_col, + BOOL_DECODER* const bc) { + int i, n, eobtotal; + TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + VP9_COMMON *const pc = &pbi->common; + MODE_INFO *orig_mi = xd->mode_info_context; + + assert(xd->mode_info_context->mbmi.encoded_as_sb); + + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(pbi, xd); + + if (pbi->common.frame_type != KEY_FRAME) + vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); + + if (xd->mode_info_context->mbmi.mb_skip_coeff) { + vp9_reset_mb_tokens_context(xd); + if (mb_col < pc->mb_cols - 1) + xd->above_context++; + if (mb_row < pc->mb_rows - 1) + xd->left_context++; + vp9_reset_mb_tokens_context(xd); + if (mb_col < pc->mb_cols - 1) + xd->above_context--; + if (mb_row < pc->mb_rows - 1) + xd->left_context--; + + /* Special case: Force the loopfilter to skip when eobtotal and + * mb_skip_coeff are zero. + */ + skip_recon_mb(pbi, xd); + return; + } + + /* do prediction */ + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_build_intra_predictors_sby_s(xd); + vp9_build_intra_predictors_sbuv_s(xd); + } else { + vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + + /* dequantization and idct */ + for (n = 0; n < 4; n++) { + BLOCKD *b = &xd->block[24]; + int x_idx = n & 1, y_idx = n >> 1; + + if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) + continue; + + xd->above_context = pc->above_context + mb_col + x_idx; + xd->left_context = pc->left_context + y_idx; + xd->mode_info_context = orig_mi + x_idx + y_idx * pc->mode_info_stride; + for (i = 0; i < 25; i++) { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + + if (tx_size == TX_16X16) { + eobtotal = vp9_decode_mb_tokens_16x16(pbi, xd, bc); + } else if (tx_size == TX_8X8) { + eobtotal = vp9_decode_mb_tokens_8x8(pbi, xd, bc); + } else { + eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); + } + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + continue; + } + + if (tx_size == TX_16X16) { + vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride); + vp9_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } else if (tx_size == TX_8X8) { + vp9_dequantize_b_2x2(b); + IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8); + ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(xd->qcoeff, + xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + vp9_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } else { + vp9_dequantize_b(b); + if (xd->eobs[24] > 1) { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } else { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + + vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(xd->qcoeff, + xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + vp9_dequant_idct_add_uv_block_4x4_inplace_c(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } + } + + xd->above_context = pc->above_context + mb_col; + xd->left_context = pc->left_context; + xd->mode_info_context = orig_mi; +} +#endif + static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, unsigned int mb_col, BOOL_DECODER* const bc) { @@ -213,9 +353,9 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int i; int tx_size; TX_TYPE tx_type; - VP9_COMMON *pc = &pbi->common; + #if CONFIG_SUPERBLOCKS - int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff; + assert(!xd->mode_info_context->mbmi.encoded_as_sb); #endif // re-initialize macroblock dequantizer before detokenization @@ -227,20 +367,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_reset_mb_tokens_context(xd); -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb && - (mb_col < pc->mb_cols - 1 || mb_row < pc->mb_rows - 1)) { - if (mb_col < pc->mb_cols - 1) - xd->above_context++; - if (mb_row < pc->mb_rows - 1) - xd->left_context++; - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - 1) - xd->above_context--; - if (mb_row < pc->mb_rows - 1) - xd->left_context--; - } -#endif } else if (!bool_error(bc)) { for (i = 0; i < 25; i++) { xd->block[i].eob = 0; @@ -267,14 +393,8 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, * mb_skip_coeff are zero. * */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; - -#if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb || orig_skip_flag) -#endif - { - skip_recon_mb(pbi, xd); - return; - } + skip_recon_mb(pbi, xd); + return; } // moved to be performed before detokenization @@ -283,12 +403,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, /* do prediction */ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - vp9_build_intra_predictors_sby_s(xd); - vp9_build_intra_predictors_sbuv_s(xd); - } else -#endif if (mode != I8X8_PRED) { vp9_build_intra_predictors_mbuv(xd); if (mode != B_PRED) { @@ -296,13 +410,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } } } else { -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); - } else -#endif vp9_build_inter_predictors_mb(xd); } @@ -404,40 +511,9 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, 16, xd->dst.y_stride); } } else if (tx_size == TX_8X8) { -#if CONFIG_SUPERBLOCKS - void *orig = xd->mode_info_context; - int n, num = xd->mode_info_context->mbmi.encoded_as_sb ? 4 : 1; - for (n = 0; n < num; n++) { - int x_idx = n & 1, y_idx = n >> 1; - if (num == 4 && (mb_col + x_idx >= pc->mb_cols || - mb_row + y_idx >= pc->mb_rows)) - continue; - - if (n != 0) { - for (i = 0; i < 25; i++) { - xd->block[i].eob = 0; - xd->eobs[i] = 0; - } - xd->above_context = pc->above_context + mb_col + (n & 1); - xd->left_context = pc->left_context + (n >> 1); - xd->mode_info_context = orig; - xd->mode_info_context += (n & 1); - xd->mode_info_context += (n >> 1) * pc->mode_info_stride; - if (!orig_skip_flag) { - eobtotal = vp9_decode_mb_tokens_8x8(pbi, xd, bc); - if (eobtotal == 0) // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - } else { - vp9_reset_mb_tokens_context(xd); - } - } - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - continue; // only happens for SBs, which are already in dest buffer -#endif vp9_dequantize_b_2x2(b); IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8); - ((int *)b->qcoeff)[0] = 0;// 2nd order block are set to 0 after inverse transform + ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; ((int *)b->qcoeff)[3] = 0; @@ -445,27 +521,9 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[5] = 0; ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(xd->qcoeff, - xd->block[0].dequant, - xd->dst.y_buffer + (n >> 1) * 16 * xd->dst.y_stride + (n & 1) * 16, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - // do UV inline also - vp9_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->dst.u_buffer + (n >> 1) * 8 * xd->dst.uv_stride + (n & 1) * 8, - xd->dst.v_buffer + (n >> 1) * 8 * xd->dst.uv_stride + (n & 1) * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); - } else -#endif vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); -#if CONFIG_SUPERBLOCKS - } - xd->mode_info_context = orig; -#endif } else { vp9_dequantize_b(b); if (xd->eobs[24] > 1) { @@ -489,25 +547,19 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } } -#if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) { -#endif - if ((tx_size == TX_8X8 && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV) - || tx_size == TX_16X16 - ) - vp9_dequant_idct_add_uv_block_8x8 - (xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16, xd); // - else if (xd->mode_info_context->mbmi.mode != I8X8_PRED) - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs + 16); -#if CONFIG_SUPERBLOCKS - } -#endif + if ((tx_size == TX_8X8 && + xd->mode_info_context->mbmi.mode != I8X8_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV) + || tx_size == TX_16X16 + ) + vp9_dequant_idct_add_uv_block_8x8 + (xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16, xd); + else if (xd->mode_info_context->mbmi.mode != I8X8_PRED) + pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16); } @@ -661,9 +713,15 @@ decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, mi[pc->mode_info_stride + 1] = mi[0]; } } + if (xd->mode_info_context->mbmi.encoded_as_sb) { + decode_superblock(pbi, xd, mb_row, mb_col, bc); + } else { +#endif + vp9_intra_prediction_down_copy(xd); + decode_macroblock(pbi, xd, mb_row, mb_col, bc); +#if CONFIG_SUPERBLOCKS + } #endif - vp9_intra_prediction_down_copy(xd); - decode_macroblock(pbi, xd, mb_row, mb_col, bc); /* check if the boolean decoder has suffered an error */ xd->corrupted |= bool_error(bc); diff --git a/vp9/decoder/dequantize.h b/vp9/decoder/dequantize.h index 560c4a417b4f34e30617f4187757d510a36e2c18..026bd2af61589cd3a044b6a3531ed5a45a36096b 100644 --- a/vp9/decoder/dequantize.h +++ b/vp9/decoder/dequantize.h @@ -73,12 +73,24 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, int stride, unsigned short *eobs, short *dc, MACROBLOCKD *xd); + +void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, short *dq, + unsigned char *dst, + int stride, char *eobs, + short *dc, MACROBLOCKD *xd); + void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, MACROBLOCKD *xd); + +void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, short *dq, + unsigned char *dstu, + unsigned char *dstv, + int stride, char *eobs, + MACROBLOCKD *xd); #endif #endif diff --git a/vp9/decoder/idct_blk.c b/vp9/decoder/idct_blk.c index d9fbf97c22361356fac00f3a22f0b10148dda26e..efe451e6cc45c6dc3f6dff9f535a2fb66cae7e49 100644 --- a/vp9/decoder/idct_blk.c +++ b/vp9/decoder/idct_blk.c @@ -36,6 +36,30 @@ void vp9_dequant_dc_idct_add_y_block_c(short *q, short *dq, } } +#if CONFIG_SUPERBLOCKS +void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, short *dq, + unsigned char *dst, + int stride, char *eobs, + short *dc, MACROBLOCKD *xd) { + int i, j; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + if (*eobs++ > 1) + vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]); + else + vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride); + + q += 16; + dst += 4; + dc++; + } + + dst += 4 * stride - 16; + } +} +#endif + void vp9_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *pre, unsigned char *dst, @@ -103,6 +127,47 @@ void vp9_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *pre, } } +#if CONFIG_SUPERBLOCKS +void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, short *dq, + unsigned char *dstu, + unsigned char *dstv, + int stride, char *eobs, + MACROBLOCKD *xd) { + int i, j; + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (*eobs++ > 1) { + vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride); + } else { + vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dstu += 4; + } + + dstu += 4 * stride - 8; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (*eobs++ > 1) { + vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride); + } else { + vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dstv += 4; + } + + dstv += 4 * stride - 8; + } +} +#endif void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq, unsigned char *pre, diff --git a/vp9/encoder/bitstream.c b/vp9/encoder/bitstream.c index b7bc99cb30dca1a166402d4c97cf8b0a7217b765..a25783a2607d1ba3a866ffcad90947674d01d715 100644 --- a/vp9/encoder/bitstream.c +++ b/vp9/encoder/bitstream.c @@ -919,7 +919,7 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { MB_MODE_INFO *mi; MV_REFERENCE_FRAME rf; MB_PREDICTION_MODE mode; - int segment_id; + int segment_id, skip_coeff; int dy = row_delta[i]; int dx = col_delta[i]; @@ -973,10 +973,11 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { } } + skip_coeff = 1; if (pc->mb_no_coeff_skip && (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - int skip_coeff = mi->mb_skip_coeff; + skip_coeff = mi->mb_skip_coeff; #if CONFIG_SUPERBLOCKS if (mi->encoded_as_sb) { skip_coeff &= m[1].mbmi.mb_skip_coeff; @@ -1107,6 +1108,7 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { cpi->common.mcomp_filter_type); } } + if (mi->second_ref_frame && (mode == NEWMV || mode == SPLITMV)) { int_mv n1, n2; @@ -1244,15 +1246,11 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { } } - if ( -#if CONFIG_SUPERBLOCKS - !mi->encoded_as_sb && -#endif - ((rf == INTRA_FRAME && mode <= I8X8_PRED) || + if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || (rf != INTRA_FRAME && !(mode == SPLITMV && mi->partitioning == PARTITIONING_4X4))) && pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && mi->mb_skip_coeff) || + !((pc->mb_no_coeff_skip && skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { TX_SIZE sz = mi->txfm_size; @@ -1389,11 +1387,7 @@ static void write_mb_modes_kf(const VP9_COMMON *c, } else write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); - if ( -#if CONFIG_SUPERBLOCKS - !m->mbmi.encoded_as_sb && -#endif - ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && + if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { diff --git a/vp9/encoder/encodeframe.c b/vp9/encoder/encodeframe.c index 703b49e35a047270eccf0bdecb750424a01a5fc6..21def264f2dac25def04bbcd891201ef723f7b30 100644 --- a/vp9/encoder/encodeframe.c +++ b/vp9/encoder/encodeframe.c @@ -55,7 +55,8 @@ int mb_row_debug, mb_col_debug; static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled); + int recon_uvoffset, int output_enabled, + int mb_col, int mb_row); static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, @@ -65,7 +66,7 @@ static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int output_enabled); static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int mb_col); + TOKENEXTRA **t, int mb_col, int mb_row); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); @@ -466,9 +467,9 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { cpi->prediction_error += ctx->distortion; cpi->intra_error += ctx->intra_error; - cpi->rd_comp_pred_diff[0] += ctx->single_pred_diff; - cpi->rd_comp_pred_diff[1] += ctx->comp_pred_diff; - cpi->rd_comp_pred_diff[2] += ctx->hybrid_pred_diff; + cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff; + cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff; + cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff; } } @@ -645,7 +646,7 @@ static void pick_mb_modes(VP9_COMP *cpi, // Dummy encode, do not do the tokenization encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0); + recon_yoffset, recon_uvoffset, 0, mb_col, mb_row); seg_id = mbmi->segment_id; if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { @@ -975,7 +976,7 @@ static void encode_sb(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { #if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.encoded_as_sb) - encode_intra_super_block(cpi, x, tp, mb_col); + encode_intra_super_block(cpi, x, tp, mb_col, mb_row); else #endif encode_intra_macro_block(cpi, x, tp, 1); @@ -1005,8 +1006,8 @@ static void encode_sb(VP9_COMP *cpi, mb_col, mb_row); else #endif - encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 1); + encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1, + mb_col, mb_row); // Note the encoder may have changed the segment_id #ifdef MODE_STATS @@ -1431,7 +1432,7 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { VP9_COMMON *cm = &cpi->common; - int mb_row, mb_col, mis = cm->mode_info_stride, segment_id; + int mb_row, mb_col, mis = cm->mode_info_stride, segment_id, skip; MODE_INFO *mi, *mi_ptr = cm->mi; #if CONFIG_SUPERBLOCKS MODE_INFO *sb_mi_ptr = cm->mi, *sb_mi; @@ -1451,17 +1452,45 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { #if CONFIG_SUPERBLOCKS sb_mbmi = &sb_mi->mbmi; #endif - if ( + if (mbmi->txfm_size > txfm_max) { +#if CONFIG_SUPERBLOCKS + if (sb_mbmi->encoded_as_sb) { + if (!((mb_col & 1) || (mb_row & 1))) { + segment_id = mbmi->segment_id; + skip = mbmi->mb_skip_coeff; + if (mb_col < cm->mb_cols - 1) { + segment_id = segment_id && mi[1].mbmi.segment_id; + skip = skip && mi[1].mbmi.mb_skip_coeff; + } + if (mb_row < cm->mb_rows - 1) { + segment_id = segment_id && + mi[cm->mode_info_stride].mbmi.segment_id; + skip = skip && mi[cm->mode_info_stride].mbmi.mb_skip_coeff; + if (mb_col < cm->mb_cols - 1) { + segment_id = segment_id && + mi[cm->mode_info_stride + 1].mbmi.segment_id; + skip = skip && mi[cm->mode_info_stride + 1].mbmi.mb_skip_coeff; + } + } + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && skip)); + mbmi->txfm_size = txfm_max; + } else { + mbmi->txfm_size = sb_mbmi->txfm_size; + } + } else { +#endif + segment_id = mbmi->segment_id; + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); + mbmi->txfm_size = txfm_max; #if CONFIG_SUPERBLOCKS - !sb_mbmi->encoded_as_sb && -#endif - mbmi->txfm_size > txfm_max) { - segment_id = mbmi->segment_id; - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; + } +#endif } #if CONFIG_SUPERBLOCKS if (mb_col & 1) @@ -1835,7 +1864,7 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, } static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int mb_col) { + TOKENEXTRA **t, int mb_col, int mb_row) { const int output_enabled = 1; int n; MACROBLOCKD *xd = &x->e_mbd; @@ -1851,7 +1880,7 @@ static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); TOKENEXTRA *tp[4]; int skip[4]; - MODE_INFO *mi = x->e_mbd.mode_info_context; + MODE_INFO *mi = xd->mode_info_context; ENTROPY_CONTEXT_PLANES ta[4], tl[4]; if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) { @@ -1862,7 +1891,6 @@ static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_intra_predictors_sby_s(&x->e_mbd); vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -1881,15 +1909,9 @@ static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, dst_uv_stride); - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); - } - vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp9_recon_mby_s_c(&x->e_mbd, dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, + vp9_fidct_mb(x, rtcd); + vp9_recon_mby_s_c(xd, dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(xd, udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); @@ -1898,16 +1920,35 @@ static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, memcpy(&tl[n], xd->left_context, sizeof(tl[n])); tp[n] = *t; xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); + vp9_tokenize_mb(cpi, xd, t, 0); skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } } if (output_enabled) { + int segment_id; + // Tokenize xd->mode_info_context = mi; + segment_id = mi->mbmi.segment_id; sum_intra_stats(cpi, x); update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count[mi->mbmi.txfm_size]++; + } else { + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_16X16 : cm->txfm_mode; + mi->mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.txfm_size = sz; + if (mb_row < cm->mb_rows - 1) { + mi[cm->mode_info_stride].mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[cm->mode_info_stride + 1].mbmi.txfm_size = sz; + } + } } } #endif /* CONFIG_SUPERBLOCKS */ @@ -1962,7 +2003,8 @@ static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, } static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled) { + int recon_uvoffset, int output_enabled, + int mb_col, int mb_row) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; @@ -2151,8 +2193,8 @@ static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, int recon_uvoffset, int mb_col, int mb_row) { const int output_enabled = 1; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *src = x->src.y_buffer; uint8_t *dst = xd->dst.y_buffer; const uint8_t *usrc = x->src.u_buffer; @@ -2162,13 +2204,13 @@ static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; const VP9_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); - unsigned int segment_id = xd->mode_info_context->mbmi.segment_id; int seg_ref_active; unsigned char ref_pred_flag; int n; TOKENEXTRA *tp[4]; int skip[4]; MODE_INFO *mi = x->e_mbd.mode_info_context; + unsigned int segment_id = mi->mbmi.segment_id; ENTROPY_CONTEXT_PLANES ta[4], tl[4]; x->skip = 0; @@ -2248,7 +2290,6 @@ static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.y_stride, xd->dst.uv_stride); } - assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -2264,13 +2305,7 @@ static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, dst_uv_stride); - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); - } - vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp9_fidct_mb(x, rtcd); vp9_recon_mby_s_c(&x->e_mbd, dst + x_idx * 16 + y_idx * 16 * dst_y_stride); vp9_recon_mbuv_s_c(&x->e_mbd, @@ -2313,5 +2348,21 @@ static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->mode_info_context = mi; update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count[mi->mbmi.txfm_size]++; + } else { + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_16X16 : cm->txfm_mode; + mi->mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.txfm_size = sz; + if (mb_row < cm->mb_rows - 1) { + mi[cm->mode_info_stride].mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[cm->mode_info_stride + 1].mbmi.txfm_size = sz; + } + } } #endif diff --git a/vp9/encoder/encodemb.c b/vp9/encoder/encodemb.c index d828c51c640357da0d4a61d0c83ba855287afe82..71e81edd0031f5be4e8c7ad8803a0a6c177425e1 100644 --- a/vp9/encoder/encodemb.c +++ b/vp9/encoder/encodemb.c @@ -884,13 +884,10 @@ static void optimize_mb_16x16(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { vp9_optimize_mbuv_8x8(x, rtcd); } -void vp9_encode_inter16x16(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; +void vp9_fidct_mb(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { + MACROBLOCKD *const xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - vp9_build_inter_predictors_mb(xd); - subtract_mb(rtcd, x); - if (tx_size == TX_16X16) { vp9_transform_mb_16x16(x); vp9_quantize_mb_16x16(x); @@ -924,7 +921,14 @@ void vp9_encode_inter16x16(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { optimize_mb_4x4(x, rtcd); vp9_inverse_transform_mb_4x4(IF_RTCD(&rtcd->common->idct), xd); } +} + +void vp9_encode_inter16x16(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + vp9_build_inter_predictors_mb(xd); + subtract_mb(rtcd, x); + vp9_fidct_mb(x, rtcd); vp9_recon_mb(xd); } diff --git a/vp9/encoder/encodemb.h b/vp9/encoder/encodemb.h index 8a3d38f1de31ce17b8ee2897a8df30291118156a..e59ed8a2748e55bcfe70c3946d25631da4fac169 100644 --- a/vp9/encoder/encodemb.h +++ b/vp9/encoder/encodemb.h @@ -55,6 +55,8 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); +void vp9_fidct_mb(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); + void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); #if CONFIG_SUPERBLOCKS diff --git a/vp9/encoder/rdopt.c b/vp9/encoder/rdopt.c index 19b96af4023ba3acea05562f07b8c495ab9bf1e9..ef92b62580c2c56087011ea6b6c9eed1f860d123 100644 --- a/vp9/encoder/rdopt.c +++ b/vp9/encoder/rdopt.c @@ -610,7 +610,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, return cost; } -static int rdcost_mby_4x4(MACROBLOCK *mb) { +static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -618,11 +618,16 @@ static int rdcost_mby_4x4(MACROBLOCK *mb) { ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } for (b = 0; b < 16; b++) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, @@ -640,7 +645,7 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, int *Rate, int *Distortion, const VP9_ENCODER_RTCD *rtcd, - int *skippable) { + int *skippable, int backup) { int b; MACROBLOCKD *const xd = &mb->e_mbd; BLOCK *const mb_y2 = mb->block + 24; @@ -674,7 +679,7 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, *Distortion = (d >> 2); // rate - *Rate = rdcost_mby_4x4(mb); + *Rate = rdcost_mby_4x4(mb, backup); *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, 1); } @@ -711,7 +716,7 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, int *Rate, int *Distortion, const VP9_ENCODER_RTCD *rtcd, - int *skippable) { + int *skippable, int backup) { MACROBLOCKD *const xd = &mb->e_mbd; BLOCK *const mb_y2 = mb->block + 24; BLOCKD *const x_y2 = xd->block + 24; @@ -735,28 +740,34 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, *Distortion = (d >> 2); // rate - *Rate = rdcost_mby_8x8(mb, 1); + *Rate = rdcost_mby_8x8(mb, backup); *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, 1); } -static int rdcost_mby_16x16(MACROBLOCK *mb) { +static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { int cost; MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); return cost; } static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, - const VP9_ENCODER_RTCD *rtcd, int *skippable) { + const VP9_ENCODER_RTCD *rtcd, int *skippable, + int backup) { int d; MACROBLOCKD *xd = &mb->e_mbd; BLOCKD *b = &mb->e_mbd.block[0]; @@ -780,125 +791,97 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, *Distortion = (d >> 2); // rate - *Rate = rdcost_mby_16x16(mb); + *Rate = rdcost_mby_16x16(mb, backup); *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd); } -static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { - VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; - - MACROBLOCKD *xd = &x->e_mbd; - int can_skip = cm->mb_no_coeff_skip; - vp9_prob skip_prob = can_skip ? vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; - int s0, s1; - int r4x4, r4x4s, r8x8, r8x8s, d4x4, d8x8, s4x4, s8x8; - int64_t rd4x4, rd8x8, rd4x4s, rd8x8s; - int d16x16, r16x16, r16x16s, s16x16; - int64_t rd16x16, rd16x16s; - - vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, - x->block[0].src_stride); - - if (skip_prob == 0) - skip_prob = 1; - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - macro_block_yrd_16x16(x, &r16x16, &d16x16, IF_RTCD(&cpi->rtcd), &s16x16); - if (can_skip) { - if (s16x16) { - rd16x16 = RDCOST(x->rdmult, x->rddiv, s1, d16x16); - } else { - rd16x16 = RDCOST(x->rdmult, x->rddiv, r16x16 + s0, d16x16); - } - } else { - rd16x16 = RDCOST(x->rdmult, x->rddiv, r16x16, d16x16); - } - r16x16s = r16x16 + vp9_cost_one(cm->prob_tx[0]) + vp9_cost_one(cm->prob_tx[1]); - if (can_skip) { - if (s16x16) { - rd16x16s = RDCOST(x->rdmult, x->rddiv, s1, d16x16); - } else { - rd16x16s = RDCOST(x->rdmult, x->rddiv, r16x16s + s0, d16x16); - } - } else { - rd16x16s = RDCOST(x->rdmult, x->rddiv, r16x16s, d16x16); - } - macro_block_yrd_8x8(x, &r8x8, &d8x8, IF_RTCD(&cpi->rtcd), &s8x8); - if (can_skip) { - if (s8x8) { - rd8x8 = RDCOST(x->rdmult, x->rddiv, s1, d8x8); - } else { - rd8x8 = RDCOST(x->rdmult, x->rddiv, r8x8 + s0, d8x8); - } - } else { - rd8x8 = RDCOST(x->rdmult, x->rddiv, r8x8, d8x8); - } - r8x8s = r8x8 + vp9_cost_one(cm->prob_tx[0]); - r8x8s += vp9_cost_zero(cm->prob_tx[1]); - if (can_skip) { - if (s8x8) { - rd8x8s = RDCOST(x->rdmult, x->rddiv, s1, d8x8); - } else { - rd8x8s = RDCOST(x->rdmult, x->rddiv, r8x8s + s0, d8x8); - } - } else { - rd8x8s = RDCOST(x->rdmult, x->rddiv, r8x8s, d8x8); - } - macro_block_yrd_4x4(x, &r4x4, &d4x4, IF_RTCD(&cpi->rtcd), &s4x4); - if (can_skip) { - if (s4x4) { - rd4x4 = RDCOST(x->rdmult, x->rddiv, s1, d4x4); - } else { - rd4x4 = RDCOST(x->rdmult, x->rddiv, r4x4 + s0, d4x4); +static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, + int r[2][TX_SIZE_MAX], int *rate, + int d[TX_SIZE_MAX], int *distortion, + int s[TX_SIZE_MAX], int *skip, + int64_t txfm_cache[NB_TXFM_MODES]) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + vp9_prob skip_prob = cm->mb_no_coeff_skip ? + vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; + int64_t rd[2][TX_SIZE_MAX]; + int n; + + r[1][TX_16X16] = r[0][TX_16X16] + vp9_cost_one(cm->prob_tx[0]) + + vp9_cost_one(cm->prob_tx[1]); + r[1][TX_8X8] = r[0][TX_8X8] + vp9_cost_one(cm->prob_tx[0]) + + vp9_cost_zero(cm->prob_tx[1]); + r[1][TX_4X4] = r[0][TX_4X4] + vp9_cost_zero(cm->prob_tx[0]); + + if (cm->mb_no_coeff_skip) { + int s0, s1; + + assert(skip_prob > 0); + s0 = vp9_cost_bit(skip_prob, 0); + s1 = vp9_cost_bit(skip_prob, 1); + + for (n = TX_4X4; n <= TX_16X16; n++) { + if (s[n]) { + rd[0][n] = rd[1][n] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + } else { + rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n] + s0, d[n]); + rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n] + s0, d[n]); + } } } else { - rd4x4 = RDCOST(x->rdmult, x->rddiv, r4x4, d4x4); - } - r4x4s = r4x4 + vp9_cost_zero(cm->prob_tx[0]); - if (can_skip) { - if (s4x4) { - rd4x4s = RDCOST(x->rdmult, x->rddiv, s1, d4x4); - } else { - rd4x4s = RDCOST(x->rdmult, x->rddiv, r4x4s + s0, d4x4); + for (n = TX_4X4; n <= TX_16X16; n++) { + rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n], d[n]); + rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n], d[n]); } - } else { - rd4x4s = RDCOST(x->rdmult, x->rddiv, r4x4s, d4x4); } - if ( cpi->common.txfm_mode == ALLOW_16X16 || - (cpi->common.txfm_mode == TX_MODE_SELECT && - rd16x16s < rd8x8s && rd16x16s < rd4x4s)) { + if ( cm->txfm_mode == ALLOW_16X16 || + (cm->txfm_mode == TX_MODE_SELECT && + rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])) { mbmi->txfm_size = TX_16X16; - *skippable = s16x16; - *distortion = d16x16; - *rate = (cpi->common.txfm_mode == ALLOW_16X16) ? r16x16 : r16x16s; - } else - if ( cpi->common.txfm_mode == ALLOW_8X8 || - (cpi->common.txfm_mode == TX_MODE_SELECT && rd8x8s < rd4x4s)) { + } else if (cm->txfm_mode == ALLOW_8X8 || + (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_8X8] < rd[1][TX_4X4])) { mbmi->txfm_size = TX_8X8; - *skippable = s8x8; - *distortion = d8x8; - *rate = (cpi->common.txfm_mode == ALLOW_8X8) ? r8x8 : r8x8s; } else { - assert(cpi->common.txfm_mode == ONLY_4X4 || - (cpi->common.txfm_mode == TX_MODE_SELECT && rd4x4s <= rd8x8s)); + assert(cm->txfm_mode == ONLY_4X4 || + (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_4X4] <= rd[1][TX_8X8])); mbmi->txfm_size = TX_4X4; - *skippable = s4x4; - *distortion = d4x4; - *rate = (cpi->common.txfm_mode == ONLY_4X4) ? r4x4 : r4x4s; } - txfm_cache[ONLY_4X4] = rd4x4; - txfm_cache[ALLOW_8X8] = rd8x8; - txfm_cache[ALLOW_16X16] = rd16x16; - if (rd16x16s < rd8x8s && rd16x16s < rd4x4s) - txfm_cache[TX_MODE_SELECT] = rd16x16s; + *distortion = d[mbmi->txfm_size]; + *rate = r[cm->txfm_mode == TX_MODE_SELECT][mbmi->txfm_size]; + *skip = s[mbmi->txfm_size]; + + txfm_cache[ONLY_4X4] = rd[0][TX_4X4]; + txfm_cache[ALLOW_8X8] = rd[0][TX_8X8]; + txfm_cache[ALLOW_16X16] = rd[0][TX_16X16]; + if (rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4]) + txfm_cache[TX_MODE_SELECT] = rd[1][TX_16X16]; else - txfm_cache[TX_MODE_SELECT] = rd4x4s < rd8x8s ? rd4x4s : rd8x8s; + txfm_cache[TX_MODE_SELECT] = rd[1][TX_4X4] < rd[1][TX_8X8] ? + rd[1][TX_4X4] : rd[1][TX_8X8]; +} + +static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int *skippable, + int64_t txfm_cache[NB_TXFM_MODES]) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX]; + + vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, + x->block[0].src_stride); + macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16], + IF_RTCD(&cpi->rtcd), &s[TX_16X16], 1); + macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8], + IF_RTCD(&cpi->rtcd), &s[TX_8X8], 1); + macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4], + IF_RTCD(&cpi->rtcd), &s[TX_4X4], 1); + + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable, + txfm_cache); } static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { @@ -911,62 +894,61 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { } #if CONFIG_SUPERBLOCKS -static void super_block_yrd_8x8(MACROBLOCK *x, - int *rate, - int *distortion, - const VP9_ENCODER_RTCD *rtcd, int *skip) -{ +static void super_block_yrd(VP9_COMP *cpi, + MACROBLOCK *x, int *rate, int *distortion, + const VP9_ENCODER_RTCD *rtcd, int *skip, + int64_t txfm_cache[NB_TXFM_MODES]) { MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *const by2 = x->block + 24; - BLOCKD *const bdy2 = xd->block + 24; - int d = 0, r = 0, n; + int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n; const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - ENTROPY_CONTEXT_PLANES t_above[2]; - ENTROPY_CONTEXT_PLANES t_left[2]; - int skippable = 1; - - vpx_memcpy(t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(t_left, xd->left_context, sizeof(t_left)); + ENTROPY_CONTEXT_PLANES t_above[3][2], *orig_above = xd->above_context; + ENTROPY_CONTEXT_PLANES t_left[3][2], *orig_left = xd->left_context; + + for (n = TX_4X4; n <= TX_16X16; n++) { + vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n])); + vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n])); + r[0][n] = 0; + d[n] = 0; + s[n] = 1; + } for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; + int r_tmp, d_tmp, s_tmp; vp9_subtract_mby_s_c(x->src_diff, src + x_idx * 16 + y_idx * 16 * src_y_stride, src_y_stride, dst + x_idx * 16 + y_idx * 16 * dst_y_stride, dst_y_stride); - vp9_transform_mby_8x8(x); - vp9_quantize_mby_8x8(x); - - /* remove 1st order dc to properly combine 1st/2nd order distortion */ - x->coeff[ 0] = 0; - x->coeff[ 64] = 0; - x->coeff[128] = 0; - x->coeff[192] = 0; - xd->dqcoeff[ 0] = 0; - xd->dqcoeff[ 64] = 0; - xd->dqcoeff[128] = 0; - xd->dqcoeff[192] = 0; - - d += vp9_mbblock_error(x, 0); - d += vp9_block_error(by2->coeff, bdy2->dqcoeff, 16); - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rdcost_mby_8x8(x, 0); - skippable = skippable && vp9_mby_is_skippable_8x8(xd, 1); + + xd->above_context = &t_above[TX_16X16][x_idx]; + xd->left_context = &t_left[TX_16X16][y_idx]; + macro_block_yrd_16x16(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0); + d[TX_16X16] += d_tmp; + r[0][TX_16X16] += r_tmp; + s[TX_16X16] = s[TX_16X16] && s_tmp; + + xd->above_context = &t_above[TX_4X4][x_idx]; + xd->left_context = &t_left[TX_4X4][y_idx]; + macro_block_yrd_4x4(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0); + d[TX_4X4] += d_tmp; + r[0][TX_4X4] += r_tmp; + s[TX_4X4] = s[TX_4X4] && s_tmp; + + xd->above_context = &t_above[TX_8X8][x_idx]; + xd->left_context = &t_left[TX_8X8][y_idx]; + macro_block_yrd_8x8(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0); + d[TX_8X8] += d_tmp; + r[0][TX_8X8] += r_tmp; + s[TX_8X8] = s[TX_8X8] && s_tmp; } - *distortion = (d >> 2); - *rate = r; - if (skip) *skip = skippable; - xd->above_context = ta; - xd->left_context = tl; - vpx_memcpy(xd->above_context, &t_above, sizeof(t_above)); - vpx_memcpy(xd->left_context, &t_left, sizeof(t_left)); + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache); + + xd->above_context = orig_above; + xd->left_context = orig_left; } #endif @@ -1190,7 +1172,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, int *rate, int *rate_tokenonly, int *distortion, - int *skippable) { + int *skippable, + int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; @@ -1202,8 +1185,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd); - super_block_yrd_8x8(x, &this_rate_tokenonly, - &this_distortion, IF_RTCD(&cpi->rtcd), &s); + super_block_yrd(cpi, x, &this_rate_tokenonly, + &this_distortion, IF_RTCD(&cpi->rtcd), &s, txfm_cache); this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type] [x->e_mbd.mode_info_context->mbmi.mode]; @@ -1239,12 +1222,12 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MB_PREDICTION_MODE mode2; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected); #endif - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int rate, ratey; int distortion, skip; int64_t best_rd = INT64_MAX; int64_t this_rd; - MACROBLOCKD *xd = &x->e_mbd; int i; for (i = 0; i < NB_TXFM_MODES; i++) @@ -1261,11 +1244,11 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, mbmi->second_mode = mode2; if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { #endif - vp9_build_intra_predictors_mby(&x->e_mbd); + vp9_build_intra_predictors_mby(xd); #if CONFIG_COMP_INTRA_PRED } else { continue; // i.e. disable for now - vp9_build_comp_intra_predictors_mby(&x->e_mbd); + vp9_build_comp_intra_predictors_mby(xd); } #endif @@ -1273,7 +1256,7 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, // FIXME add compoundmode cost // FIXME add rate for mode2 - rate = ratey + x->mbmode_cost[x->e_mbd.frame_type][mbmi->mode]; + rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -1519,18 +1502,23 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } -static int rd_cost_mbuv(MACROBLOCK *mb) { +static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { int b; int cost = 0; MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } for (b = 16; b < 24; b++) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, @@ -1541,15 +1529,13 @@ static int rd_cost_mbuv(MACROBLOCK *mb) { } -static int64_t rd_inter16x16_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - +static int64_t rd_inter16x16_uv_4x4(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int fullpixel, int *skip, + int do_ctx_backup) { vp9_transform_mbuv_4x4(x); vp9_quantize_mbuv_4x4(x); - *rate = rd_cost_mbuv(x); + *rate = rd_cost_mbuv_4x4(x, do_ctx_backup); *distortion = vp9_mbuverror(x) / 4; *skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd); @@ -1582,10 +1568,24 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { return cost; } +static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int fullpixel, int *skip, + int do_ctx_backup) { + vp9_transform_mbuv_8x8(x); + vp9_quantize_mbuv_8x8(x); + + *rate = rd_cost_mbuv_8x8(x, do_ctx_backup); + *distortion = vp9_mbuverror(x) / 4; + *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd); + + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); +} + #if CONFIG_SUPERBLOCKS -static int64_t rd_inter32x32_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, +static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel, int *skip) { MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int n, r = 0, d = 0; const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; @@ -1600,7 +1600,10 @@ static int64_t rd_inter32x32_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; + int d_tmp, s_tmp, r_tmp; + xd->above_context = ta + x_idx; + xd->left_context = tl + y_idx; vp9_subtract_mbuv_s_c(x->src_diff, usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, @@ -1609,58 +1612,35 @@ static int64_t rd_inter32x32_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, dst_uv_stride); - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); + if (mbmi->txfm_size == TX_4X4) { + rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); + } else { + rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); + } - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rd_cost_mbuv_8x8(x, 0); - d += vp9_mbuverror(x) / 4; - skippable = skippable && vp9_mbuv_is_skippable_8x8(xd); + r += r_tmp; + d += d_tmp; + skippable = skippable && s_tmp; } *rate = r; *distortion = d; - if (skip) *skip = skippable; + *skip = skippable; xd->left_context = tl; xd->above_context = ta; memcpy(xd->above_context, t_above, sizeof(t_above)); memcpy(xd->left_context, t_left, sizeof(t_left)); - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); + return RDCOST(x->rdmult, x->rddiv, r, d); } #endif -static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - - *rate = rd_cost_mbuv_8x8(x, 1); - *distortion = vp9_mbuverror(x) / 4; - *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} - - static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skippable, int fullpixel) { + int *distortion, int *skip, int fullpixel) { vp9_build_inter4x4_predictors_mbuv(&x->e_mbd); vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - - *rate = rd_cost_mbuv(x); - *distortion = vp9_mbuverror(x) / 4; - *skippable = vp9_mbuv_is_skippable_4x4(&x->e_mbd); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); + return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1); } static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, @@ -1707,7 +1687,7 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, vp9_transform_mbuv_4x4(x); vp9_quantize_mbuv_4x4(x); - rate_to = rd_cost_mbuv(x); + rate_to = rd_cost_mbuv_4x4(x, 1); rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; @@ -2434,8 +2414,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // store everything needed to come back to this!! for (i = 0; i < 16; i++) { - BLOCKD *bd = &x->e_mbd.block[i]; - bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; if (mbmi->second_ref_frame) bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; @@ -3114,12 +3092,9 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, PARTITION_INFO *partition, int_mv *ref_mv, int_mv *second_ref_mv, - int single_pred_diff, - int comp_pred_diff, - int hybrid_pred_diff, + int64_t comp_pred_diff[NB_PREDICTION_TYPES], int64_t txfm_size_diff[NB_TXFM_MODES]) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + MACROBLOCKD *const xd = &x->e_mbd; // Take a snapshot of the coding context so it can be // restored if we decide to encode this way @@ -3135,15 +3110,11 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // ctx[mb_index].rddiv = x->rddiv; // ctx[mb_index].rdmult = x->rdmult; - ctx->single_pred_diff = single_pred_diff; - ctx->comp_pred_diff = comp_pred_diff; - ctx->hybrid_pred_diff = hybrid_pred_diff; + ctx->single_pred_diff = comp_pred_diff[SINGLE_PREDICTION_ONLY]; + ctx->comp_pred_diff = comp_pred_diff[COMP_PREDICTION_ONLY]; + ctx->hybrid_pred_diff = comp_pred_diff[HYBRID_PREDICTION]; - if (txfm_size_diff) { - memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); - } else { - memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); - } + memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); } static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, int this_mode, @@ -3159,12 +3130,15 @@ static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, int this_mode, *distortion2 += *distortion; // UV cost and distortion + vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, + x->e_mbd.predictor, x->src.uv_stride); if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4) rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv, - cpi->common.full_pixel, &uv_skippable); + cpi->common.full_pixel, &uv_skippable, 1); else - rd_inter16x16_uv(cpi, x, rate_uv, distortion_uv, cpi->common.full_pixel, - &uv_skippable); + rd_inter16x16_uv_4x4(cpi, x, rate_uv, distortion_uv, + cpi->common.full_pixel, &uv_skippable, 1); + *rate2 += *rate_uv; *distortion2 += *distortion_uv; *skippable = y_skippable && uv_skippable; @@ -3183,8 +3157,8 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, unsigned char *u_buffer[4], unsigned char *v_buffer[4]) { YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx]; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_find_near_mvs(xd, xd->mode_info_context, xd->prev_mode_info_context, @@ -3435,14 +3409,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_SUPERBLOCKS int skippable_y, skippable_uv; - // Y cost and distortion - FIXME support other transform sizes - super_block_yrd_8x8(x, rate_y, distortion_y, - IF_RTCD(&cpi->rtcd), &skippable_y); + // Y cost and distortion + super_block_yrd(cpi, x, rate_y, distortion_y, + IF_RTCD(&cpi->rtcd), &skippable_y, txfm_cache); *rate2 += *rate_y; *distortion += *distortion_y; - rd_inter32x32_uv_8x8(cpi, x, rate_uv, distortion_uv, - cm->full_pixel, &skippable_uv); + rd_inter32x32_uv(cpi, x, rate_uv, distortion_uv, + cm->full_pixel, &skippable_uv); *rate2 += *rate_uv; *distortion += *distortion_uv; @@ -4053,8 +4027,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best compound/single-only prediction */ - if (!disable_skip && - mbmi->ref_frame != INTRA_FRAME) { + if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { int64_t single_rd, hybrid_rd; int single_rate, hybrid_rate; @@ -4202,12 +4175,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context(x, &x->mb_context[xd->mb_index], - best_mode_index, &best_partition, - &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], - &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], - (int)best_pred_diff[0], (int)best_pred_diff[1], (int)best_pred_diff[2], - best_txfm_diff); + store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, + &best_partition, &frame_best_ref_mv[mbmi->ref_frame], + &frame_best_ref_mv[mbmi->second_ref_frame], + best_pred_diff, best_txfm_diff); } #if CONFIG_SUPERBLOCKS @@ -4221,13 +4192,14 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int error_y, error_uv; int dist_y, dist_uv; int y_skip, uv_skip; + int64_t txfm_cache[NB_TXFM_MODES]; xd->mode_info_context->mbmi.txfm_size = TX_8X8; + error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, txfm_cache); error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip); - error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip); if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + @@ -4408,7 +4380,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame; unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; - int comp_pred; + int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; int_mv frame_best_ref_mv[4]; int frame_mdcounts[4][4]; @@ -4423,10 +4395,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; int64_t best_rd = INT64_MAX; - int64_t best_comp_rd = INT64_MAX; - int64_t best_single_rd = INT64_MAX; - int64_t best_hybrid_rd = INT64_MAX; int64_t best_yrd = INT64_MAX; + int64_t best_txfm_rd[NB_TXFM_MODES]; + int64_t best_txfm_diff[NB_TXFM_MODES]; + int64_t best_pred_diff[NB_PREDICTION_TYPES]; + int64_t best_pred_rd[NB_PREDICTION_TYPES]; MB_MODE_INFO best_mbmode; int mode_index, best_mode_index; unsigned int ref_costs[MAX_REF_FRAMES]; @@ -4436,6 +4409,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, estimate_ref_frame_costs(cpi, segment_id, ref_costs); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); + for (i = 0; i < NB_PREDICTION_TYPES; ++i) + best_pred_rd[i] = INT64_MAX; + for (i = 0; i < NB_TXFM_MODES; i++) + best_txfm_rd[i] = INT64_MAX; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, @@ -4606,14 +4584,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } #endif - if (!disable_skip && xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - if (this_rd < best_comp_rd) - best_comp_rd = this_rd; - if (this_rd < best_single_rd) - best_single_rd = this_rd; - if (this_rd < best_hybrid_rd) - best_hybrid_rd = this_rd; - } + if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) + for (i = 0; i < NB_PREDICTION_TYPES; ++i) + best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { @@ -4673,14 +4646,28 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (mbmi->second_ref_frame == INTRA_FRAME && single_rd < best_single_rd) { - best_single_rd = single_rd; + if (mbmi->second_ref_frame == INTRA_FRAME && + single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { + best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; } else if (mbmi->second_ref_frame != INTRA_FRAME && - single_rd < best_comp_rd) { - best_comp_rd = single_rd; + single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { + best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; } - if (hybrid_rd < best_hybrid_rd) { - best_hybrid_rd = hybrid_rd; + if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) + best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + } + + /* keep record of best txfm size */ + if (!mode_excluded && this_rd != INT64_MAX) { + for (i = 0; i < NB_TXFM_MODES; i++) { + int64_t adj_rd; + if (this_mode != B_PRED) { + adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; + } else { + adj_rd = this_rd; + } + if (adj_rd < best_txfm_rd[i]) + best_txfm_rd[i] = adj_rd; } } @@ -4719,31 +4706,40 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->uv_mode = DC_PRED; mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; mbmi->partitioning = 0; - mbmi->txfm_size = TX_8X8; + mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ? + TX_16X16 : cm->txfm_mode; - if (best_rd != INT64_MAX) - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame], - 0, 0, 0, NULL); - return best_rd; + vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); + vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); + goto end; } // macroblock modes vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - mbmi->txfm_size = TX_8X8; - - if (best_rd != INT64_MAX) - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame], - (best_single_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_single_rd), - (best_comp_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_comp_rd), - (best_hybrid_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_hybrid_rd), - NULL); + + for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + if (best_pred_rd[i] == INT64_MAX) + best_pred_diff[i] = INT_MIN; + else + best_pred_diff[i] = best_rd - best_pred_rd[i]; + } + + if (!x->skip) { + for (i = 0; i < NB_TXFM_MODES; i++) { + if (best_txfm_rd[i] == INT64_MAX) + best_txfm_diff[i] = INT_MIN; + else + best_txfm_diff[i] = best_rd - best_txfm_rd[i]; + } + } else { + vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); + } + + end: + store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, + &frame_best_ref_mv[mbmi->ref_frame], + &frame_best_ref_mv[mbmi->second_ref_frame], + best_pred_diff, best_txfm_diff); return best_rd; }