Commit d78dbff0 authored by Deb Mukherjee's avatar Deb Mukherjee

Subpel search cleanups and enhancements

- Some fixes to surface fit.
- Returns variance function as cost rather than sad in the
  pattern search and diamond search functions. Only
  vp9_pattern_search_sad function used in bigdia search
  uses sad as integer 1-away costs.
- Deploys SUBPEL_TREE_PRUNED_MORE for speed 4+.

Results:
derf [Speed 3]: About +0.036% in coding efficiency without any
discernible speed loss.
derf [Speed 4]: About 2-3% faster at -0.199% loss in coding efficiency.
derf [Speed 5]: About 3-4% faster at -0.149% loss in coding efficiency.

Change-Id: I8462f94f6adb46966ca964f2bd0400977357fd63
parent cfc337aa
......@@ -531,8 +531,8 @@ static INLINE int get_chessboard_index(const int frame_index) {
return frame_index & 0x1;
}
static INLINE int *cond_sad_list(const struct VP9_COMP *cpi, int *sad_list) {
return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL;
static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) {
return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
}
#ifdef __cplusplus
......
......@@ -34,7 +34,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
MV ref_full;
int sad_list[5];
int cost_list[5];
// Further step/diamond searches as necessary
int step_param = mv_sf->reduce_first_step_size;
......@@ -47,7 +47,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
/*cpi->sf.search_method == HEX*/
vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
&v_fn_ptr, 0, ref_mv, dst_mv);
// Try sub-pixel MC
......@@ -58,7 +58,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0);
}
......
This diff is collapsed.
......@@ -80,7 +80,7 @@ typedef int (integer_mv_pattern_search_fn) (
int search_param,
int error_per_bit,
int do_init_search,
int *sad_list,
int *cost_list,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
const MV *center_mv,
......@@ -100,7 +100,7 @@ typedef int (fractional_mv_step_fp) (
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step,
int *sad_list,
int *cost_list,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
const uint8_t *second_pred,
......@@ -109,7 +109,7 @@ typedef int (fractional_mv_step_fp) (
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
......@@ -142,7 +142,7 @@ struct VP9_COMP;
int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
int *sad_list,
int *cost_list,
const MV *ref_mv, MV *tmp_mv,
int var_max, int rd);
......
......@@ -132,7 +132,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
int rv = 0;
int sad_list[5];
int cost_list[5];
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
if (cpi->common.show_frame &&
......@@ -160,7 +160,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.row >>= 3;
vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
&ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
x->mv_col_min = tmp_col_min;
......@@ -187,7 +187,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref], NULL, 0, 0);
x->pred_mv[ref] = tmp_mv->as_mv;
......
......@@ -1605,7 +1605,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
int sadpb = x->sadperbit4;
MV mvp_full;
int max_mv;
int sad_list[5];
int cost_list[5];
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
......@@ -1651,7 +1651,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, sadpb,
cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
&bsi->ref_mv[0]->as_mv, new_mv,
INT_MAX, 1);
......@@ -1665,7 +1665,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
sadpb, 16, &cpi->fn_ptr[bsize],
&bsi->ref_mv[0]->as_mv,
&best_mv->as_mv);
sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
if (thissme < bestsme) {
bestsme = thissme;
*new_mv = best_mv->as_mv;
......@@ -1686,7 +1686,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->errorperbit, &cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&distortion,
&x->pred_sse[mbmi->ref_frame[0]],
......@@ -2036,7 +2036,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
int sad_list[5];
int cost_list[5];
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
......@@ -2108,7 +2108,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.row >>= 3;
bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
&ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
x->mv_col_min = tmp_col_min;
......@@ -2124,7 +2124,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref], NULL, 0, 0);
}
......
......@@ -124,7 +124,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->tx_size_search_method = USE_LARGESTALL;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->mv.search_method = BIGDIA;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->adaptive_rd_thresh = 4;
sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE;
sf->disable_filter_search_var_thresh = 200;
......@@ -425,8 +425,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more;
} else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore;
}
cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
......
......@@ -78,9 +78,9 @@ typedef enum {
typedef enum {
SUBPEL_TREE = 0,
SUBPEL_TREE_PRUNED = 1,
SUBPEL_TREE_PRUNED_MORE = 2,
SUBPEL_SURFACE_FIT = 3,
SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches
SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively
SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches
// Other methods to come
} SUBPEL_SEARCH_METHODS;
......
......@@ -221,7 +221,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int bestsme = INT_MAX;
int distortion;
unsigned int sse;
int sad_list[5];
int cost_list[5];
MV best_ref_mv1 = {0, 0};
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
......@@ -245,7 +245,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
// Ignore mv costing by sending NULL pointer instead of cost arrays
vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
&cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
// Ignore mv costing by sending NULL pointer instead of cost array
......@@ -255,7 +255,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, mv_sf->subpel_iters_per_step,
cond_sad_list(cpi, sad_list),
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment