diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 0bfaec7a0c5e8573318ddf01643e012ff810c2ce..b58945e51b129ee680e3b6dd1a998de3979c45fb 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -417,6 +417,7 @@ typedef struct macroblockd { static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { switch (subsize) { + case BLOCK_SIZE_SB64X64: case BLOCK_SIZE_SB64X32: case BLOCK_SIZE_SB32X64: case BLOCK_SIZE_SB32X32: @@ -444,10 +445,10 @@ static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type, BLOCK_SIZE_TYPE sb_size) { - int bsl = mi_width_log2(sb_size), bs = 1 << bsl; - int bwl = mi_width_log2(sb_type); - int bhl = mi_height_log2(sb_type); - int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; + int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2; + int bwl = b_width_log2(sb_type); + int bhl = b_height_log2(sb_type); + int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl; int i; #if !CONFIG_AB4X4 diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 3e3a94e51da35652474eb4616e97175edb3d4bfb..a44fe4ca59d997d605eda339b78cc2b2937fce18 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -107,10 +107,10 @@ const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { // FIXME(jingning,rbultje) put real probabilities here #if CONFIG_AB4X4 - {202, 162, 107}, - {16, 2, 169}, - {3, 246, 19}, - {104, 90, 134}, + {105, 88, 252}, + {113, 88, 249}, + {113, 106, 251}, + {126, 105, 107}, #endif {202, 162, 107}, {16, 2, 169}, diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 3864d3c862e40d2c7991c41e8c885ec15c357fdc..d34bfa74d4e34d1b241541ccd68d0c8b1962ab0b 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -540,8 +540,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int_mv *const mv0 = &mbmi->mv[0]; int_mv *const mv1 = &mbmi->mv[1]; - const int bw = 1 << mi_width_log2(mi->mbmi.sb_type); - const int bh = 1 << mi_height_log2(mi->mbmi.sb_type); + BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); const int use_prev_in_find_mv_refs = cm->width == cm->last_width && cm->height == cm->last_height && @@ -549,6 +550,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, cm->last_show_frame; int mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge; + int j, idx, idy; mbmi->need_to_clamp_mvs = 0; mbmi->need_to_clamp_secondmv = 0; @@ -562,7 +564,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to MV values // that are in 1/8th pel units - set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); + set_mi_row_col(cm, xd, mi_row, 1 << mi_height_log2(bsize), + mi_col, 1 << mi_width_log2(bsize)); mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; @@ -613,14 +616,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = ZEROMV; } else { #if CONFIG_AB4X4 - if (mbmi->sb_type >= BLOCK_SIZE_SB8X8) + if (bsize >= BLOCK_SIZE_SB8X8) mbmi->mode = read_sb_mv_ref(r, mv_ref_p); else mbmi->mode = SPLITMV; #else - mbmi->mode = mbmi->sb_type > BLOCK_SIZE_SB8X8 ? - read_sb_mv_ref(r, mv_ref_p) - : read_mv_ref(r, mv_ref_p); + mbmi->mode = bsize > BLOCK_SIZE_SB8X8 ? + read_sb_mv_ref(r, mv_ref_p) + : read_mv_ref(r, mv_ref_p); #endif vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]); } @@ -685,80 +688,87 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->uv_mode = DC_PRED; switch (mbmi->mode) { - case SPLITMV: { - const int num_p = 4; - int j = 0; - - mbmi->need_to_clamp_mvs = 0; - do { // for each subset j - int_mv leftmv, abovemv, second_leftmv, second_abovemv; - int_mv blockmv, secondmv; - int mv_contz; - int blockmode; - int k = j; - - leftmv.as_int = left_block_mv(xd, mi, k); - abovemv.as_int = above_block_mv(mi, k, mis); - second_leftmv.as_int = 0; - second_abovemv.as_int = 0; - if (mbmi->second_ref_frame > 0) { - second_leftmv.as_int = left_block_second_mv(xd, mi, k); - second_abovemv.as_int = above_block_second_mv(mi, k, mis); - } - mv_contz = vp9_mv_cont(&leftmv, &abovemv); - blockmode = read_sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]); - cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++; - - switch (blockmode) { - case NEW4X4: - decode_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, - &cm->fc.NMVcount, xd->allow_high_precision_mv); - - if (mbmi->second_ref_frame > 0) - decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->fc.NMVcount, xd->allow_high_precision_mv); - -#ifdef VPX_MODE_COUNT - vp9_mv_cont_count[mv_contz][3]++; -#endif - break; - case LEFT4X4: - blockmv.as_int = leftmv.as_int; - if (mbmi->second_ref_frame > 0) - secondmv.as_int = second_leftmv.as_int; -#ifdef VPX_MODE_COUNT - vp9_mv_cont_count[mv_contz][0]++; -#endif - break; - case ABOVE4X4: - blockmv.as_int = abovemv.as_int; - if (mbmi->second_ref_frame > 0) - secondmv.as_int = second_abovemv.as_int; -#ifdef VPX_MODE_COUNT - vp9_mv_cont_count[mv_contz][1]++; -#endif - break; - case ZERO4X4: - blockmv.as_int = 0; - if (mbmi->second_ref_frame > 0) - secondmv.as_int = 0; -#ifdef VPX_MODE_COUNT - vp9_mv_cont_count[mv_contz][2]++; + case SPLITMV: +#if !CONFIG_AB4X4 + bw = 1, bh = 1; #endif - break; - default: - break; + mbmi->need_to_clamp_mvs = 0; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int_mv leftmv, abovemv, second_leftmv, second_abovemv; + int_mv blockmv, secondmv; + int mv_contz; + int blockmode; + int i, k; + j = idy * 2 + idx; + k = j; + + leftmv.as_int = left_block_mv(xd, mi, k); + abovemv.as_int = above_block_mv(mi, k, mis); + second_leftmv.as_int = 0; + second_abovemv.as_int = 0; + if (mbmi->second_ref_frame > 0) { + second_leftmv.as_int = left_block_second_mv(xd, mi, k); + second_abovemv.as_int = above_block_second_mv(mi, k, mis); + } + mv_contz = vp9_mv_cont(&leftmv, &abovemv); + blockmode = read_sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]); + cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++; + + switch (blockmode) { + case NEW4X4: + decode_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, + &cm->fc.NMVcount, xd->allow_high_precision_mv); + + if (mbmi->second_ref_frame > 0) + decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, + &cm->fc.NMVcount, xd->allow_high_precision_mv); + + #ifdef VPX_MODE_COUNT + vp9_mv_cont_count[mv_contz][3]++; + #endif + break; + case LEFT4X4: + blockmv.as_int = leftmv.as_int; + if (mbmi->second_ref_frame > 0) + secondmv.as_int = second_leftmv.as_int; + #ifdef VPX_MODE_COUNT + vp9_mv_cont_count[mv_contz][0]++; + #endif + break; + case ABOVE4X4: + blockmv.as_int = abovemv.as_int; + if (mbmi->second_ref_frame > 0) + secondmv.as_int = second_abovemv.as_int; + #ifdef VPX_MODE_COUNT + vp9_mv_cont_count[mv_contz][1]++; + #endif + break; + case ZERO4X4: + blockmv.as_int = 0; + if (mbmi->second_ref_frame > 0) + secondmv.as_int = 0; + #ifdef VPX_MODE_COUNT + vp9_mv_cont_count[mv_contz][2]++; + #endif + break; + default: + break; + } + mi->bmi[j].as_mv[0].as_int = blockmv.as_int; + if (mbmi->second_ref_frame > 0) + mi->bmi[j].as_mv[1].as_int = secondmv.as_int; + + for (i = 1; i < bh; ++i) + vpx_memcpy(&mi->bmi[j + i * 2], &mi->bmi[j], sizeof(mi->bmi[j])); + for (i = 1; i < bw; ++i) + vpx_memcpy(&mi->bmi[j + i], &mi->bmi[j], sizeof(mi->bmi[j])); } - mi->bmi[j].as_mv[0].as_int = blockmv.as_int; - if (mbmi->second_ref_frame > 0) - mi->bmi[j].as_mv[1].as_int = secondmv.as_int; - } while (++j < num_p); - } - - mv0->as_int = mi->bmi[3].as_mv[0].as_int; - mv1->as_int = mi->bmi[3].as_mv[1].as_int; + } - break; /* done with SPLITMV */ + mv0->as_int = mi->bmi[3].as_mv[0].as_int; + mv1->as_int = mi->bmi[3].as_mv[1].as_int; + break; /* done with SPLITMV */ case NEARMV: // Clip "next_nearest" so that it does not extend to far out of image @@ -822,14 +832,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mv0->as_int = 0; #if CONFIG_AB4X4 - if (mbmi->sb_type >= BLOCK_SIZE_SB8X8) { + if (bsize >= BLOCK_SIZE_SB8X8) { mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob); cm->fc.sb_ymode_counts[mbmi->mode]++; } else { mbmi->mode = I4X4_PRED; } #else - if (mbmi->sb_type > BLOCK_SIZE_SB8X8) { + if (bsize > BLOCK_SIZE_SB8X8) { mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob); cm->fc.sb_ymode_counts[mbmi->mode]++; } else { @@ -840,7 +850,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // If MB mode is I4X4_PRED read the block modes #if CONFIG_AB4X4 - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + if (bsize < BLOCK_SIZE_SB8X8) { #else if (mbmi->mode == I4X4_PRED) { #endif @@ -857,21 +867,21 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, } #if CONFIG_AB4X4 - if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && - mbmi->sb_type >= BLOCK_SIZE_SB8X8) { + if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && + bsize >= BLOCK_SIZE_SB8X8) { #else if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != I4X4_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { #endif - const int allow_16x16 = mbmi->sb_type >= BLOCK_SIZE_MB16X16; - const int allow_32x32 = mbmi->sb_type >= BLOCK_SIZE_SB32X32; + const int allow_16x16 = bsize >= BLOCK_SIZE_MB16X16; + const int allow_32x32 = bsize >= BLOCK_SIZE_SB32X32; mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32); - } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 && + } else if (bsize >= BLOCK_SIZE_SB32X32 && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; } else if (cm->txfm_mode >= ALLOW_16X16 && - mbmi->sb_type >= BLOCK_SIZE_MB16X16 + bsize >= BLOCK_SIZE_MB16X16 #if !CONFIG_AB4X4 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV)) @@ -880,7 +890,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && #if CONFIG_AB4X4 - (mbmi->sb_type >= BLOCK_SIZE_SB8X8)) + (bsize >= BLOCK_SIZE_SB8X8)) #else (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) && !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV))) diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index b200e6ccceb43bfd1098dfed3bb9988b3f24a43b..e58c579796ca06e385fd0d826295131b1abf27f5 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -413,6 +413,11 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD *const xd = &pbi->mb; +#if CONFIG_AB4X4 + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; +#endif set_offsets(pbi, bsize, mi_row, mi_col); vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r); set_refs(pbi, mi_row, mi_col); @@ -465,6 +470,7 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, } subsize = get_subsize(bsize, partition); + *(get_sb_index(xd, subsize)) = 0; switch (partition) { case PARTITION_NONE: @@ -472,11 +478,13 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, break; case PARTITION_HORZ: decode_modes_b(pbi, mi_row, mi_col, r, subsize); + *(get_sb_index(xd, subsize)) = 1; if (mi_row + bs < pc->mi_rows) decode_modes_b(pbi, mi_row + bs, mi_col, r, subsize); break; case PARTITION_VERT: decode_modes_b(pbi, mi_row, mi_col, r, subsize); + *(get_sb_index(xd, subsize)) = 1; if (mi_col + bs < pc->mi_cols) decode_modes_b(pbi, mi_row, mi_col + bs, r, subsize); break; diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 6a55e8fb8157e90134ab4b993ee4e7818c8e4695..9761bd6f8d2066d3003ae5185be53df0e7aaabf4 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -34,7 +34,7 @@ static void recon_write_yuv_frame(const char *name, const YV12_BUFFER_CONFIG *s, int w, int _h) { - FILE *yuv_file = fopen((char *)name, "ab"); + FILE *yuv_file = fopen(name, "ab"); const uint8_t *src = s->y_buffer; int h = _h; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index bcec13c4bedcafe87ad40e0026169bfe362a9207..b09da88e4706390fb4fda9d4698391c4471cf092 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -696,39 +696,50 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, nmvc, xd->allow_high_precision_mv); break; case SPLITMV: { - int j = 0; - - do { - B_PREDICTION_MODE blockmode; - int_mv blockmv; - int k = -1; /* first block in subset j */ - int mv_contz; - int_mv leftmv, abovemv; - - blockmode = cpi->mb.partition_info->bmi[j].mode; - blockmv = cpi->mb.partition_info->bmi[j].mv; - k = j; - leftmv.as_int = left_block_mv(xd, m, k); - abovemv.as_int = above_block_mv(m, k, mis); - mv_contz = vp9_mv_cont(&leftmv, &abovemv); - - write_sub_mv_ref(bc, blockmode, - cpi->common.fc.sub_mv_ref_prob[mv_contz]); - cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; - if (blockmode == NEW4X4) { + int j; + B_PREDICTION_MODE blockmode; + int_mv blockmv; + int k = -1; /* first block in subset j */ + int mv_contz; + int_mv leftmv, abovemv; + int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl; + int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl; + int idx, idy; +#if !CONFIG_AB4X4 + bw = 1, bh = 1; +#endif + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + j = idy * 2 + idx; + blockmode = cpi->mb.partition_info->bmi[j].mode; + blockmv = cpi->mb.partition_info->bmi[j].mv; + k = j; + leftmv.as_int = left_block_mv(xd, m, k); + abovemv.as_int = above_block_mv(m, k, mis); + mv_contz = vp9_mv_cont(&leftmv, &abovemv); + + write_sub_mv_ref(bc, blockmode, + cpi->common.fc.sub_mv_ref_prob[mv_contz]); + cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; + if (blockmode == NEW4X4) { #ifdef ENTROPY_STATS - active_section = 11; + active_section = 11; #endif - vp9_encode_mv(bc, &blockmv.as_mv, &mi->best_mv.as_mv, - nmvc, xd->allow_high_precision_mv); - - if (mi->second_ref_frame > 0) - vp9_encode_mv(bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &mi->best_second_mv.as_mv, + vp9_encode_mv(bc, &blockmv.as_mv, &mi->best_mv.as_mv, nmvc, xd->allow_high_precision_mv); + + if (mi->second_ref_frame > 0) + vp9_encode_mv(bc, + &cpi->mb.partition_info->bmi[j].second_mv.as_mv, + &mi->best_second_mv.as_mv, + nmvc, xd->allow_high_precision_mv); + } } - } while (++j < cpi->mb.partition_info->count); + } + +#ifdef MODE_STATS + ++count_mb_seg[mi->partitioning]; +#endif break; } default: @@ -837,6 +848,11 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; +#if CONFIG_AB4X4 + if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; +#endif xd->mode_info_context = m; set_mi_row_col(&cpi->common, xd, mi_row, 1 << mi_height_log2(m->mbmi.sb_type), @@ -891,7 +907,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, #if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) - if (xd->ab_index != 0) + if (xd->ab_index > 0) return; #endif @@ -910,6 +926,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } subsize = get_subsize(bsize, partition); + *(get_sb_index(xd, subsize)) = 0; switch (partition) { case PARTITION_NONE: @@ -917,11 +934,13 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, break; case PARTITION_HORZ: write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); + *(get_sb_index(xd, subsize)) = 1; if ((mi_row + bs) < cm->mi_rows) write_modes_b(cpi, m + bs * mis, bc, tok, tok_end, mi_row + bs, mi_col); break; case PARTITION_VERT: write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); + *(get_sb_index(xd, subsize)) = 1; if ((mi_col + bs) < cm->mi_cols) write_modes_b(cpi, m + bs, bc, tok, tok_end, mi_row, mi_col + bs); break; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 27f693d64585cb119a573310ba4dd7137a7c272e..3e108c8fcfd9e4d96855d6d10bc9f9f372877da9 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -786,6 +786,12 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, if (sub_index != -1) *(get_sb_index(xd, bsize)) = sub_index; + +#if CONFIG_AB4X4 + if (bsize < BLOCK_SIZE_SB8X8) + if (xd->ab_index > 0) + return; +#endif set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); @@ -828,13 +834,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, if (bsl == bwl && bsl == bhl) { #if CONFIG_AB4X4 - if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) { - if (bsize > BLOCK_SIZE_SB8X8 || - (bsize == BLOCK_SIZE_SB8X8 && c1 == bsize)) + if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) cpi->partition_count[pl][PARTITION_NONE]++; - else - cpi->partition_count[pl][PARTITION_SPLIT]++; - } #else if (output_enabled && bsize > BLOCK_SIZE_SB8X8) cpi->partition_count[pl][PARTITION_NONE]++; @@ -909,7 +910,6 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, return; } #endif - assert(mi_height_log2(bsize) == mi_width_log2(bsize)); // buffer the above/left context information of the block in search. @@ -939,7 +939,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, for (i = 0; i < 4; ++i) { int x_idx = (i & 1) * (ms >> 1); int y_idx = (i >> 1) * (ms >> 1); - int r, d; + int r = 0, d = 0; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -966,10 +966,13 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } - // TODO(jingning): need to enable 4x8 and 8x4 partition coding // PARTITION_HORZ if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) && +#if CONFIG_AB4X4 + (bsize >= BLOCK_SIZE_SB8X8)) { +#else (bsize >= BLOCK_SIZE_MB16X16)) { +#endif int r2, d2; int mb_skip = 0; subsize = get_subsize(bsize, PARTITION_HORZ); @@ -978,7 +981,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, get_block_context(x, subsize)); if (mi_row + ms <= cm->mi_rows) { - int r, d; + int r = 0, d = 0; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; @@ -992,8 +995,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); +#if CONFIG_AB4X4 + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_HORZ]; +#else r2 += x->partition_cost[pl][PARTITION_HORZ]; - +#endif if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { srate = r2; @@ -1005,7 +1012,11 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, // PARTITION_VERT if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) && +#if CONFIG_AB4X4 + (bsize >= BLOCK_SIZE_SB8X8)) { +#else (bsize >= BLOCK_SIZE_MB16X16)) { +#endif int r2, d2; int mb_skip = 0; subsize = get_subsize(bsize, PARTITION_VERT); @@ -1013,7 +1024,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, get_block_context(x, subsize)); if (mi_col + ms <= cm->mi_cols) { - int r, d; + int r = 0, d = 0; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *(get_sb_index(xd, subsize)) = 1; @@ -1027,8 +1038,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, } set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); +#if CONFIG_AB4X4 + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_VERT]; +#else r2 += x->partition_cost[pl][PARTITION_VERT]; - +#endif if ((RDCOST(x->rdmult, x->rddiv, r2, d2) < RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) { srate = r2; diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index d180e46cf88015103a8fa036e6168f1ca5e51276..e2cd8838c31930f8847af13c7b29e404edeb3c62 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -573,7 +573,11 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl; int idx, idy; +#if CONFIG_AB4X4 + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { +#else if (mbmi->mode == SPLITMV) { +#endif int i; PARTITION_INFO *pi = x->partition_info; #if !CONFIG_AB4X4 diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f928e7afe86be512c8b5a585675e45c580076340..ff437c18e99a34538616acf6858bb436426803d0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -910,6 +910,11 @@ static int labels2mode(MACROBLOCK *x, MB_MODE_INFO * mbmi = &mic->mbmi; const int mis = xd->mode_info_stride; int i, cost = 0, thismvcost = 0; +#if CONFIG_AB4X4 + int idx, idy; + int bw = 1 << b_width_log2(mbmi->sb_type); + int bh = 1 << b_height_log2(mbmi->sb_type); +#endif /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array @@ -993,6 +998,17 @@ static int labels2mode(MACROBLOCK *x, x->partition_info->bmi[i].mv.as_int = this_mv->as_int; if (mbmi->second_ref_frame > 0) x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; +#if CONFIG_AB4X4 + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + vpx_memcpy(&mic->bmi[i + idy * 2 + idx], + &mic->bmi[i], sizeof(mic->bmi[i])); + vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); + } + } +#endif } cost += thismvcost; @@ -1007,8 +1023,15 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, int *distortion, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { - int i; + int i, k; MACROBLOCKD *xd = &x->e_mbd; + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int idx, idy; +#if !CONFIG_AB4X4 + bw = 1, bh = 1; +#endif *labelyrate = 0; *distortion = 0; @@ -1018,10 +1041,10 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, x->plane[0].src.buf, src_stride); - int16_t* const src_diff = + int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); + int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, xd->plane[0].pre[0].buf, @@ -1030,7 +1053,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, xd->plane[0].dst.buf, xd->plane[0].dst.stride); - int thisdistortion; + int thisdistortion = 0; + int thisrate = 0; vp9_build_inter_predictor(pre, xd->plane[0].pre[0].stride, @@ -1038,7 +1062,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[0], &xd->scale_factor[0], - 4, 4, 0 /* no avg */, &xd->subpix); + 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); // TODO(debargha): Make this work properly with the // implicit-compoundinter-weight experiment when implicit @@ -1051,22 +1075,33 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, dst, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[1], - &xd->scale_factor[1], 4, 4, 1, + &xd->scale_factor[1], 4 * bw, 4 * bh, 1, &xd->subpix); } - vp9_subtract_block(4, 4, src_diff, 8, + vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); - x->fwd_txm4x4(src_diff, coeff, 16); - x->quantize_b_4x4(x, i, DCT_DCT, 16); - thisdistortion = vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, - i, 16), 16); + + k = i; + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + k += (idy * 2 + idx); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, k, DCT_DCT, 16); + thisdistortion += vp9_block_error(coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, + k, 16), 16); + thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, + ta + (k & 1), + tl + (k >> 1), TX_4X4, 16); + } + } *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC, - ta + (i & 1), - tl + (i >> 1), TX_4X4, 16); + *labelyrate += thisrate; } } *distortion >>= 2; @@ -1155,15 +1190,18 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int sbr = 0, sbd = 0; int segmentyrate = 0; int best_eobs[4] = { 0 }; -#if CONFIG_AB4X4 BLOCK_SIZE_TYPE bsize = mbmi->sb_type; - int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); -#endif - + int bwl = b_width_log2(bsize), bw = 1 << bwl; + int bhl = b_height_log2(bsize), bh = 1 << bhl; + int idx, idy; vp9_variance_fn_ptr_t *v_fn_ptr; - ENTROPY_CONTEXT t_above[2], t_left[2]; - ENTROPY_CONTEXT t_above_b[2], t_left_b[2]; + ENTROPY_CONTEXT t_above[4], t_left[4]; + ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; + +#if !CONFIG_AB4X4 + bh = 1, bw = 1; +#endif vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); @@ -1181,183 +1219,367 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads +#if !CONFIG_AB4X4 rate += vp9_cost_mv_ref(cpi, SPLITMV, mbmi->mb_mode_context[mbmi->ref_frame]); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; +#endif other_segment_rd = this_segment_rd; - for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { - int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; - B_PREDICTION_MODE mode_selected = ZERO4X4; - int bestlabelyrate = 0; - - // search for the best motion vector on this segment - for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { - int64_t this_rd; - int distortion; - int labelyrate; - ENTROPY_CONTEXT t_above_s[2], t_left_s[2]; - - vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); - vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); - - // motion search for newmv (single predictor case only) - if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { - int sseshift, n; - int step_param = 0; - int further_steps; - int thissme, bestsme = INT_MAX; - const struct buf_2d orig_src = x->plane[0].src; - const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; - - /* Is the best so far sufficiently good that we cant justify doing - * and new motion search. */ - if (best_label_rd < label_mv_thresh) - break; + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + // TODO(jingning,rbultje): rewrite the rate-distortion optimization + // loop for 4x4/4x8/8x4 block coding +#if CONFIG_AB4X4 + int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; + B_PREDICTION_MODE mode_selected = ZERO4X4; + int bestlabelyrate = 0; + i = idy * 2 + idx; + + // search for the best motion vector on this segment + for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) { + int64_t this_rd; + int distortion; + int labelyrate; + ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; + + vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); + vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); + + // motion search for newmv (single predictor case only) + if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { + int sseshift, n; + int step_param = 0; + int further_steps; + int thissme, bestsme = INT_MAX; + const struct buf_2d orig_src = x->plane[0].src; + const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; + + /* Is the best so far sufficiently good that we cant justify doing + * and new motion search. */ + if (best_label_rd < label_mv_thresh) + break; - if (cpi->compressor_speed) { - // use previous block's result as next block's MV predictor. - if (i > 0) { - bsi->mvp.as_int = - x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; - if (i == 2) + if (cpi->compressor_speed) { + // use previous block's result as next block's MV predictor. + if (i > 0) { bsi->mvp.as_int = - x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; - step_param = 2; + x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; + if (i == 2) + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; + step_param = 2; + } } - } - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - - { - int sadpb = x->sadperbit4; - int_mv mvp_full; - - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; - - // find first label - n = i; - - // adjust src pointer for this segment - x->plane[0].src.buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, - x->plane[0].src.buf, - x->plane[0].src.stride); - assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); - x->e_mbd.plane[0].pre[0].buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, - x->e_mbd.plane[0].pre[0].buf, - x->e_mbd.plane[0].pre[0].stride); - - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - bsi->ref_mv, &mode_mv[NEW4X4]); - - sseshift = 0; - - // Should we do a full search (best quality only) - if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - - thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, v_fn_ptr, - x->nmvjointcost, x->mvcost, - bsi->ref_mv, - n); - - if (thissme < bestsme) { - bestsme = thissme; - mode_mv[NEW4X4].as_int = - x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; - } else { - /* The full search result is actually worse so re-instate the - * previous best vector */ - x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = - mode_mv[NEW4X4].as_int; + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; + + { + int sadpb = x->sadperbit4; + int_mv mvp_full; + + mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + + // find first label + n = i; + + // adjust src pointer for this segment + x->plane[0].src.buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); + x->e_mbd.plane[0].pre[0].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); + + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 0, v_fn_ptr, + bsi->ref_mv, &mode_mv[NEW4X4]); + + sseshift = 0; + + // Should we do a full search (best quality only) + if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { + /* Check if mvp_full is within the range. */ + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + + thissme = cpi->full_search_sad(x, &mvp_full, + sadpb, 16, v_fn_ptr, + x->nmvjointcost, x->mvcost, + bsi->ref_mv, + n); + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEW4X4].as_int = + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; + } else { + /* The full search result is actually worse so re-instate the + * previous best vector */ + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = + mode_mv[NEW4X4].as_int; + } } } - } - if (bestsme < INT_MAX) { - int distortion; - unsigned int sse; - cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, - x->nmvjointcost, x->mvcost, - &distortion, &sse); + if (bestsme < INT_MAX) { + int distortion; + unsigned int sse; + cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], + bsi->ref_mv, x->errorperbit, v_fn_ptr, + x->nmvjointcost, x->mvcost, + &distortion, &sse); - // safe motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; + // safe motion search result for use in compound prediction + seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; + } + + // restore src pointers + x->plane[0].src = orig_src; + x->e_mbd.plane[0].pre[0] = orig_pre; + } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { + /* NEW4X4 */ + /* motion search not completed? Then skip newmv for this block with + * comppred */ + if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || + seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { + continue; + } } - // restore src pointers - x->plane[0].src = orig_src; - x->e_mbd.plane[0].pre[0] = orig_pre; - } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { - /* NEW4X4 */ - /* motion search not completed? Then skip newmv for this block with - * comppred */ - if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || - seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { + rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + &second_mode_mv[this_mode], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); + + // Trap vectors that reach beyond the UMV borders + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || + ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || + ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { continue; } - } + if (mbmi->second_ref_frame > 0 && + mv_check_bounds(x, &second_mode_mv[this_mode])) + continue; - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], - &second_mode_mv[this_mode], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, - x->mvcost, cpi); + this_rd = encode_inter_mb_segment(&cpi->common, + x, labels, i, &labelyrate, + &distortion, t_above_s, t_left_s); + this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + rate += labelyrate; + + if (this_rd < best_label_rd) { + sbr = rate; + sbd = distortion; + bestlabelyrate = labelyrate; + mode_selected = this_mode; + best_label_rd = this_rd; + for (j = 0; j < 4; j++) + if (labels[j] == i) + best_eobs[j] = x->e_mbd.plane[0].eobs[j]; + + vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); + vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); + } + } /*for each 4x4 mode*/ - // Trap vectors that reach beyond the UMV borders - if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || - ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || - ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { - continue; - } - if (mbmi->second_ref_frame > 0 && - mv_check_bounds(x, &second_mode_mv[this_mode])) - continue; + vpx_memcpy(t_above, t_above_b, sizeof(t_above)); + vpx_memcpy(t_left, t_left_b, sizeof(t_left)); - this_rd = encode_inter_mb_segment(&cpi->common, - x, labels, i, &labelyrate, - &distortion, t_above_s, t_left_s); - this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - rate += labelyrate; - - if (this_rd < best_label_rd) { - sbr = rate; - sbd = distortion; - bestlabelyrate = labelyrate; - mode_selected = this_mode; - best_label_rd = this_rd; - for (j = 0; j < 4; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.plane[0].eobs[j]; - - vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); - vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); - } - } /*for each 4x4 mode*/ + labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], + &second_mode_mv[mode_selected], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); +#else + int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; + B_PREDICTION_MODE mode_selected = ZERO4X4; + int bestlabelyrate = 0; + i = idy * 2 + idx; + + // search for the best motion vector on this segment + for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { + int64_t this_rd; + int distortion; + int labelyrate; + ENTROPY_CONTEXT t_above_s[2], t_left_s[2]; + + vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); + vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); + + // motion search for newmv (single predictor case only) + if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { + int sseshift, n; + int step_param = 0; + int further_steps; + int thissme, bestsme = INT_MAX; + const struct buf_2d orig_src = x->plane[0].src; + const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; + + /* Is the best so far sufficiently good that we cant justify doing + * and new motion search. */ + if (best_label_rd < label_mv_thresh) + break; - vpx_memcpy(t_above, t_above_b, sizeof(t_above)); - vpx_memcpy(t_left, t_left_b, sizeof(t_left)); + if (cpi->compressor_speed) { + // use previous block's result as next block's MV predictor. + if (i > 0) { + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; + if (i == 2) + bsi->mvp.as_int = + x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; + step_param = 2; + } + } + + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; + + { + int sadpb = x->sadperbit4; + int_mv mvp_full; + + mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + + // find first label + n = i; + + // adjust src pointer for this segment + x->plane[0].src.buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); + x->e_mbd.plane[0].pre[0].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); - labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, - x->mvcost, cpi); + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 0, v_fn_ptr, + bsi->ref_mv, &mode_mv[NEW4X4]); + + sseshift = 0; + + // Should we do a full search (best quality only) + if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { + /* Check if mvp_full is within the range. */ + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + + thissme = cpi->full_search_sad(x, &mvp_full, + sadpb, 16, v_fn_ptr, + x->nmvjointcost, x->mvcost, + bsi->ref_mv, + n); + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEW4X4].as_int = + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; + } else { + /* The full search result is actually worse so re-instate the + * previous best vector */ + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = + mode_mv[NEW4X4].as_int; + } + } + } + + if (bestsme < INT_MAX) { + int distortion; + unsigned int sse; + cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], + bsi->ref_mv, x->errorperbit, v_fn_ptr, + x->nmvjointcost, x->mvcost, + &distortion, &sse); + + // safe motion search result for use in compound prediction + seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; + } + + // restore src pointers + x->plane[0].src = orig_src; + x->e_mbd.plane[0].pre[0] = orig_pre; + } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { + /* NEW4X4 */ + /* motion search not completed? Then skip newmv for this block with + * comppred */ + if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || + seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { + continue; + } + } + + rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + &second_mode_mv[this_mode], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); + + // Trap vectors that reach beyond the UMV borders + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || + ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || + ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { + continue; + } + if (mbmi->second_ref_frame > 0 && + mv_check_bounds(x, &second_mode_mv[this_mode])) + continue; - br += sbr; - bd += sbd; - segmentyrate += bestlabelyrate; - this_segment_rd += best_label_rd; - other_segment_rd += best_other_rd; + this_rd = encode_inter_mb_segment(&cpi->common, + x, labels, i, &labelyrate, + &distortion, t_above_s, t_left_s); + this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + rate += labelyrate; + + if (this_rd < best_label_rd) { + sbr = rate; + sbd = distortion; + bestlabelyrate = labelyrate; + mode_selected = this_mode; + best_label_rd = this_rd; + for (j = 0; j < 4; j++) + if (labels[j] == i) + best_eobs[j] = x->e_mbd.plane[0].eobs[j]; + + vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); + vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); + } + } /*for each 4x4 mode*/ + + vpx_memcpy(t_above, t_above_b, sizeof(t_above)); + vpx_memcpy(t_left, t_left_b, sizeof(t_left)); + + labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], + &second_mode_mv[mode_selected], seg_mvs[i], + bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, + x->mvcost, cpi); +#endif + + br += sbr; + bd += sbd; + segmentyrate += bestlabelyrate; + this_segment_rd += best_label_rd; + other_segment_rd += best_other_rd; + + for (j = 1; j < bh; ++j) + vpx_memcpy(&x->partition_info->bmi[i + j * 2], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); + for (j = 1; j < bw; ++j) + vpx_memcpy(&x->partition_info->bmi[i + j], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); + } } /* for each label */ if (this_segment_rd < bsi->segment_rd) { @@ -2504,12 +2726,23 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable; int64_t txfm_cache[NB_TXFM_MODES]; + int i; + + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = INT64_MAX; // Test best rd so far against threshold for trying this mode. +#if CONFIG_AB4X4 + if (bsize >= BLOCK_SIZE_SB8X8 && + (best_rd < cpi->rd_threshes[mode_index] || + cpi->rd_threshes[mode_index] == INT_MAX)) + continue; +#else if (best_rd <= cpi->rd_threshes[mode_index] || cpi->rd_threshes[mode_index] == INT_MAX) { continue; } +#endif x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; @@ -2520,7 +2753,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } +#if CONFIG_AB4X4 + if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) { +#else if (cpi->speed > 0) { +#endif if (!(ref_frame_mask & (1 << ref_frame))) { continue; } @@ -2652,6 +2889,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += dist_uv[TX_4X4]; distortion_uv = dist_uv[TX_4X4]; mbmi->uv_mode = mode_uv[TX_4X4]; +#if CONFIG_AB4X4 + txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = txfm_cache[ONLY_4X4]; +#endif } else if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; vp9_build_intra_predictors_sby_s(xd, bsize); @@ -2785,6 +3027,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; +#if CONFIG_AB4X4 + txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < NB_TXFM_MODES; ++i) + txfm_cache[i] = txfm_cache[ONLY_4X4]; +#endif + if (!mode_excluded) { if (is_comp_pred) mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; @@ -2855,7 +3103,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Is Mb level skip allowed (i.e. not coded at segment level). mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); +#if CONFIG_AB4X4 + if (skippable && bsize >= BLOCK_SIZE_SB8X8) { +#else if (skippable) { +#endif // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best_yrd calculation @@ -3001,12 +3253,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; + int64_t adj_rd = INT64_MAX; if (this_mode != I4X4_PRED) { adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; } else { adj_rd = this_rd; } + if (adj_rd < best_txfm_rd[i]) best_txfm_rd[i] = adj_rd; } @@ -3073,7 +3326,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { + (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME) +#if CONFIG_AB4X4 + && bsize >= BLOCK_SIZE_SB8X8 +#endif + ) { mbmi->mode = ZEROMV; mbmi->ref_frame = ALTREF_FRAME; mbmi->second_ref_frame = NONE;