From 6253cc9279652fd7550bc103f85bb45525ff20f1 Mon Sep 17 00:00:00 2001 From: Paul Wilkins <paulwilkins@google.com> Date: Tue, 1 Oct 2013 16:57:18 +0100 Subject: [PATCH] Speed setting review. Substantial reworking of the speed vs quality trade offs for speed 1 and 2. In this patch I am attempting to freeze the "quality" meaning of speeds 1 and 2 relative to speed 0 so that in future we can better evaluate progress. I am targeting : Speed 1 quality ~-5% vs speed 0. Speed 2 quality ~-10% vs speed 0 It is inevitable that quality will still fluctuate a little as we adjust settings and add new features, but we will attempt to keep as close as possible to these values. Above speed 2 things will remain a bit more fluid for now. In this patch speed 1 is approximately 4-5x as fast as speed 0. This is similar to before but the quality hit is a lot less. Likewise speed 2 is approximately 2x as fast as speed 1 but is similar in quality to the previous speed 1 configuration. Also slight change to behavior of FLAG_EARLY_TERMINATE to insure all reference frames get at least one rd test. Important for very low variance regions. WIP :- Added a new speed level with old speed 4 becoming speed 5. Speed 3 and 4 tradeoffs still WIP Change-Id: Ic7a38dd7b5b63ab1501f9352411972f480ac6264 --- vp9/encoder/vp9_encodeframe.c | 4 +- vp9/encoder/vp9_onyx_if.c | 107 +++++++++++++++++----------------- vp9/encoder/vp9_onyx_int.h | 10 +++- vp9/encoder/vp9_rdopt.c | 5 +- 4 files changed, 70 insertions(+), 56 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 0c1c30d681..957c57793f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1781,7 +1781,9 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, || cpi->common.show_frame == 0 || cpi->common.frame_type == KEY_FRAME || cpi->is_src_frame_alt_ref - || sb_has_motion(cpi, prev_mi_8x8)) { + || ((cpi->sf.use_lastframe_partitioning == + LAST_FRAME_PARTITION_LOW_MOTION) && + sb_has_motion(cpi, prev_mi_8x8))) { // If required set upper and lower partition size limits if (cpi->sf.auto_min_max_partition_size) { set_offsets(cpi, mi_row, mi_col, BLOCK_64X64); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index acad8e161a..f7d9869962 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -714,7 +714,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->comp_inter_joint_search_thresh = BLOCK_4X4; sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = 0; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; @@ -761,87 +761,88 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->static_segmentation = 0; #endif sf->use_avoid_tested_higherror = 1; - sf->adaptive_rd_thresh = MIN((speed + 1), 4); + sf->adaptive_rd_thresh = 1; if (speed == 1) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->less_rectangular_check = 1; + sf->use_square_partition_only = !(cpi->common.frame_type == KEY_FRAME || + cpi->common.intra_only); sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || - cpi->common.intra_only || - cpi->common.show_frame == 0) ? - USE_FULL_RD : - USE_LARGESTALL); + cpi->common.intra_only) + ? USE_FULL_RD : USE_LARGESTALL); + sf->disable_splitmv = + (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + } + if (speed == 2) { sf->use_square_partition_only = !(cpi->common.frame_type == KEY_FRAME || - cpi->common.intra_only || - cpi->common.show_frame == 0); + cpi->common.intra_only); + sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || + cpi->common.intra_only) + ? USE_FULL_RD : USE_LARGESTALL); sf->disable_splitmv = - (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; - sf->use_uv_intra_rd_estimate = 1; + sf->use_rd_breakout = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; sf->adaptive_motion_search = 1; sf->auto_mv_step_size = 1; - sf->auto_min_max_partition_size = 1; - // FIXME(jingning): temporarily turn off disable_split_var_thresh - // during refactoring process. will get this back after finishing - // the main framework of partition search type. - sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 16; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; - sf->intra_y_mode_mask = INTRA_DC_TM_H_V; - sf->intra_uv_mode_mask = INTRA_DC_TM_H_V; - sf->use_fast_coef_updates = 1; + sf->adaptive_rd_thresh = 2; sf->mode_skip_start = 11; } - if (speed == 2) { - sf->less_rectangular_check = 1; + if (speed == 3) { sf->use_square_partition_only = 1; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_lastframe_partitioning = 1; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || - cpi->common.intra_only || - cpi->common.show_frame == 0) ? - USE_FULL_RD : - USE_LARGESTALL); + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_splitmv = + (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->intra_y_mode_mask = INTRA_DC_TM; - sf->intra_uv_mode_mask = INTRA_DC_TM; - sf->use_uv_intra_rd_estimate = 1; + FLAG_SKIP_INTRA_LOWVAR; + sf->use_rd_breakout = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; sf->adaptive_motion_search = 1; - sf->using_small_partition_info = 0; - sf->disable_splitmv = - (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1; - sf->search_method = SQUARE; - sf->subpel_iters_per_step = 1; - sf->use_fast_lpf_pick = 1; + + sf->disable_filter_search_var_thresh = 16; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->auto_min_max_partition_size = 1; - sf->disable_split_var_thresh = 32; - sf->disable_filter_search_var_thresh = 32; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->use_lp32x32fdct = 1; + sf->subpel_iters_per_step = 1; sf->use_fast_coef_updates = 2; + + sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; } - if (speed == 3) { + if (speed == 4) { sf->less_rectangular_check = 1; sf->use_square_partition_only = 1; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_lastframe_partitioning = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || @@ -872,9 +873,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_split_var_thresh = 64; sf->disable_filter_search_var_thresh = 64; sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; } - if (speed == 4) { + if (speed == 5) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->use_one_partition_size_always = 1; sf->always_this_block_size = BLOCK_16X16; @@ -904,6 +906,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->intra_y_mode_mask = INTRA_DC_ONLY; sf->intra_uv_mode_mask = INTRA_DC_ONLY; sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; } break; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 3dbeb98e2a..db8b484cc8 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -36,7 +36,7 @@ #define DISABLE_RC_LONG_TERM_MEM 0 #endif -// #define MODE_TEST_HIT_STATS +#define MODE_TEST_HIT_STATS // #define SPEEDSTATS 1 #if CONFIG_MULTIPLE_ARF @@ -229,6 +229,12 @@ typedef enum { #define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) #define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) +typedef enum { + LAST_FRAME_PARTITION_OFF = 0, + LAST_FRAME_PARTITION_LOW_MOTION = 1, + LAST_FRAME_PARTITION_ALL = 2 +} LAST_FRAME_PARTITION_METHOD; + typedef struct { int RD; SEARCH_METHODS search_method; @@ -246,7 +252,7 @@ typedef struct { int adaptive_rd_thresh; int skip_encode_sb; int skip_encode_frame; - int use_lastframe_partitioning; + LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; TX_SIZE_SEARCH_METHOD tx_size_search_method; int use_lp32x32fdct; int use_avoid_tested_higherror; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 83cd612261..96ed6f7952 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -52,6 +52,8 @@ DECLARE_ALIGNED(16, extern const uint8_t, #define GOLDEN_FRAME_MODE_MASK 0xFFB5A3BB0 #define ALT_REF_MODE_MASK 0xFF8C648D0 +#define MIN_EARLY_TERM_INDEX 3 + const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {RD_NEARESTMV, LAST_FRAME, NONE}, {RD_NEARESTMV, ALTREF_FRAME, NONE}, @@ -3851,7 +3853,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history - if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) { + if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && + (mode_index > MIN_EARLY_TERM_INDEX)) { const int qstep = xd->plane[0].dequant[1]; // TODO(debargha): Enhance this by specializing for each mode_index int scale = 4; -- GitLab