diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 448c1c4102cc6be8aff4d0797350f9211d65339a..a0e7b37da033c4a694bb2cb1ec2910f48077d9e4 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -736,18 +736,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->quarter_pixel_search = 1; sf->half_pixel_search = 1; sf->iterative_sub_pixel = 1; - sf->no_skip_block4x4_search = 1; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->comp_inter_joint_search_thresh = BLOCK_SIZE_AB4X4; + sf->adpative_rd_thresh = 0; + #if CONFIG_MULTIPLE_ARF // Switch segmentation off. sf->static_segmentation = 0; #else sf->static_segmentation = 0; #endif - sf->mb16_breakout = 0; switch (mode) { case 0: // best quality mode @@ -761,13 +761,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { #else sf->static_segmentation = 0; #endif - sf->mb16_breakout = 0; sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; - + sf->adpative_rd_thresh = 1; if (speed > 0) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->optimize_coefficients = 0; - sf->no_skip_block4x4_search = 0; sf->first_step = 1; } break; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 670f41f3b648ff68c066be83129c85fa7d5c027d..de9d18c167ddbfb2943383ee3d0e89f12fb4ef83 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -213,11 +213,10 @@ typedef struct { int max_step_search_steps; int first_step; int optimize_coefficients; - int no_skip_block4x4_search; int search_best_filter; - int mb16_breakout; int static_segmentation; int comp_inter_joint_search_thresh; + int adpative_rd_thresh; } SPEED_FEATURES; enum BlockSize { @@ -320,8 +319,10 @@ typedef struct VP9_COMP { unsigned int mode_chosen_counts[MAX_MODES]; int rd_thresh_mult[MAX_MODES]; - int rd_baseline_thresh[MAX_MODES]; - int rd_threshes[MAX_MODES]; + int rd_baseline_thresh[BLOCK_SIZE_TYPES][MAX_MODES]; + int rd_threshes[BLOCK_SIZE_TYPES][MAX_MODES]; + int rd_thresh_freq_fact[BLOCK_SIZE_TYPES][MAX_MODES]; + int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES]; int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; int comp_pred_count[COMP_PRED_CONTEXTS]; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 60badadddb16c364a4a6b70ab2dbc4c380f68f98..1dde0de35245fd9303d76805291347ff7b19b5e9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -111,6 +111,17 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, }; +// The baseline rd thresholds for breaking out of the rd loop for +// certain modes are assumed to be based on 8x8 blocks. +// This table is used to correct for blocks size. +// The factors here are << 2 (2 = x0.5, 32 = x8 etc). +static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] = + {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; + +#define BASE_RD_THRESH_FREQ_FACT 16 +#define MAX_RD_THRESH_FREQ_FACT 32 +#define MAX_RD_THRESH_FREQ_INC 1 + static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES], vp9_coeff_count (*cnoskip)[BLOCK_TYPES], vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { @@ -175,7 +186,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { - int q, i; + int q, i, bsize; vp9_clear_system_state(); // __asm emms; @@ -207,24 +218,43 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { cpi->RDDIV = 1; cpi->RDMULT /= 100; - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < INT_MAX) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; - } else { - cpi->rd_threshes[i] = INT_MAX; + for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { + for (i = 0; i < MAX_MODES; ++i) { + // Threshold here seem unecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[] + int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); + + // *4 relates to the scaling of rd_thresh_block_size_factor[] + if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[bsize][i] = + cpi->sf.thresh_mult[i] * q * + rd_thresh_block_size_factor[bsize] / (4 * 100); + } else { + cpi->rd_threshes[bsize][i] = INT_MAX; + } + cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; + cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } else { cpi->RDDIV = 100; - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; - } else { - cpi->rd_threshes[i] = INT_MAX; + for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { + for (i = 0; i < MAX_MODES; i++) { + // Threshold here seem unecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[] + int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); + + if (cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[bsize][i] = + cpi->sf.thresh_mult[i] * q * + rd_thresh_block_size_factor[bsize] / 4; + } else { + cpi->rd_threshes[bsize][i] = INT_MAX; + } + cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; + cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } @@ -2619,9 +2649,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[i] = INT64_MAX; // Test best rd so far against threshold for trying this mode. - if (bsize >= BLOCK_SIZE_SB8X8 && - (best_rd < cpi->rd_threshes[mode_index] || - cpi->rd_threshes[mode_index] == INT_MAX)) + if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * + cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) || + cpi->rd_threshes[bsize][mode_index] == INT_MAX) continue; x->skip = 0; @@ -2812,9 +2842,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ? - cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; + cpi->rd_threshes[bsize][THR_NEWMV] : + cpi->rd_threshes[bsize][THR_NEWA]; this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ? - cpi->rd_threshes[THR_NEWG] : this_rd_thresh; + cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (switchable_filter_index = 0; @@ -3155,7 +3186,27 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (is_inter_mode(best_mode)) ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; - // TODO(rbultje) integrate with RD thresholding + // Updating rd_thresh_freq_fact[] here means that the differnt + // partition/block sizes are handled independently based on the best + // choice for the current partition. It may well be better to keep a scaled + // best rd so far value and update rd_thresh_freq_fact based on the mode/size + // combination that wins out. + if (cpi->sf.adpative_rd_thresh) { + for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { + if (mode_index == best_mode_index) { + cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT; + } else { + cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC; + if (cpi->rd_thresh_freq_fact[bsize][mode_index] > + (cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) { + cpi->rd_thresh_freq_fact[bsize][mode_index] = + cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT; + } + } + } + } + + // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding #if 0 // Reduce the activation RD thresholds for the best choice mode if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&