diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 7b2dd113c874c6c3c0e829d3bcac7377178e34b9..013047e355dbb616b94897f35287978bf2e93735 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -50,6 +50,10 @@ typedef struct { int64_t tx_rd_diff[TX_MODES]; int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]; + // motion vector cache for adaptive motion search control in partition + // search loop + int_mv pred_mv[MAX_REF_FRAMES]; + // Bit flag for each mode whether it has high error in comparison to others. unsigned int modes_with_high_error; @@ -149,7 +153,7 @@ struct macroblock { // Used to store sub partition's choices. int fast_ms; - int_mv pred_mv; + int_mv pred_mv[MAX_REF_FRAMES]; int subblock_ref; // TODO(jingning): Need to refactor the structure arrays that buffers the diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 45bf87238f5b512d7630acb77da28ee1abe45c2e..c04953751a86f48e0e3dc777a89e6b5cce18ac57 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1316,7 +1316,6 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); x->fast_ms = 0; - x->pred_mv.as_int = 0; x->subblock_ref = 0; if (cpi->sf.adjust_partitioning_from_last_frame) { @@ -1710,10 +1709,6 @@ static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) { // Set fast motion search level. x->fast_ms = 1; - // Calculate prediction MV. - x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2; - x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2; - if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 && d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) { // Set fast motion search level. @@ -1729,6 +1724,14 @@ static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) { } } +static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { + vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); +} + +static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { + vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); +} + // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. @@ -1837,6 +1840,10 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } + // store estimated motion vector + if (cpi->sf.adaptive_motion_search) + store_pred_mv(x, get_block_context(x, bsize)); + // PARTITION_SPLIT sum_rd = 0; // TODO(jingning): use the motion vectors given by the above search as @@ -1851,7 +1858,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, continue; *get_sb_index(xd, subsize) = i; - + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, get_block_context(x, bsize)); rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd); @@ -1885,7 +1893,6 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } x->fast_ms = 0; - x->pred_mv.as_int = 0; x->subblock_ref = 0; if (partition_split_done && @@ -1897,6 +1904,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, if (partition_horz_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_HORZ); *get_sb_index(xd, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, get_block_context(x, bsize)); pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -1906,6 +1915,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(xd, subsize) = 1; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, get_block_context(x, bsize)); pick_sb_modes(cpi, mi_row + ms, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); @@ -1937,6 +1948,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, subsize = get_subsize(bsize, PARTITION_VERT); *get_sb_index(xd, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, get_block_context(x, bsize)); pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -1945,6 +1958,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(xd, subsize) = 1; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, get_block_context(x, bsize)); pick_sb_modes(cpi, mi_row, mi_col + ms, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 7e429f661ea102921337cb25f2ae7862d3f16049..883b31e4eb1329ced31735f84eaa67ef8acffa0e 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -711,6 +711,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_lastframe_partitioning = 0; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; + sf->adaptive_motion_search = 0; sf->use_avoid_tested_higherror = 0; sf->reference_masking = 0; sf->skip_lots_of_modes = 0; @@ -739,7 +740,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->using_small_partition_info = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - #if CONFIG_MULTIPLE_ARF // Switch segmentation off. sf->static_segmentation = 0; @@ -782,6 +782,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->use_lp32x32fdct = 1; + sf->adaptive_motion_search = 1; sf->auto_mv_step_size = 1; sf->auto_min_max_partition_size = 1; @@ -821,6 +822,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->use_lp32x32fdct = 1; + sf->adaptive_motion_search = 1; sf->using_small_partition_info = 0; sf->disable_splitmv = (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 66da026e3f26694b0a9a9e0bf978f98be7e7abad..3e5796f3ce47d51be0e119c95b1c79daeb3cfd8d 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -284,6 +284,8 @@ typedef struct { int last_partitioning_redo_frequency; int disable_splitmv; int using_small_partition_info; + // TODO(jingning): combine the related motion search speed features + int adaptive_motion_search; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7a61d07514e27ae4243f9dbd4360dd88cae026b9..95f6d3ce8eeb3ed2fffa75e3f0c24bb1e0a08a43 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1771,6 +1771,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, max_mv = x->max_mv_context[mbmi->ref_frame[0]]; else max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; + if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for @@ -1781,11 +1782,16 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, step_param = cpi->mv_step_param; } - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) { + mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; + mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; + step_param = MAX(step_param, 8); + } + + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; // adjust src pointer for this block mi_buf_shift(x, i); if (cpi->sf.search_method == HEX) { @@ -1839,10 +1845,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &distortion, &sse); - // safe motion search result for use in compound prediction + // save motion search result for use in compound prediction seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; } + if (cpi->sf.adaptive_motion_search) + x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; + // restore src pointers mi_buf_restore(x, orig_src, orig_pre); } @@ -2085,10 +2094,14 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; int row_offset, col_offset; + int num_mv_refs = MAX_MV_REF_CANDIDATES + + (cpi->sf.adaptive_motion_search && + cpi->common.show_frame && block_size < BLOCK_64X64); // Get the sad for each candidate reference mv - for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) { - this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int; + for (i = 0; i < num_mv_refs; i++) { + this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ? + mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int; max_mv = MAX(max_mv, MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); @@ -2349,7 +2362,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, step_param = 8; // Get prediction MV. - mvp_full.as_int = x->pred_mv.as_int; + mvp_full.as_int = x->pred_mv[ref].as_int; // Adjust MV sign if needed. if (cm->ref_frame_sign_bias[ref]) { @@ -2368,11 +2381,19 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } else { step_param = cpi->mv_step_param; } - // mvp_full.as_int = ref_mv[0].as_int; - mvp_full.as_int = - mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int; } + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && + cpi->common.show_frame) { + int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), + b_width_log2(bsize))); + step_param = MAX(step_param, boffset); + } + + mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ? + mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int : + x->pred_mv[ref].as_int; + mvp_full.as_mv.col >>= 3; mvp_full.as_mv.row >>= 3; @@ -2422,6 +2443,10 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, x->nmvjointcost, x->mvcost, 96); + + if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) + x->pred_mv[ref].as_int = tmp_mv->as_int; + if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++)