diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 014f54a49d5cd05144c03ea12323e52a2acedaa4..96e66f076ff4459c7dbb790d151f9d86bc44145b 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -245,6 +245,71 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) +#define FIRST_LEVEL_CHECKS \ + { \ + unsigned int left, right, up, down, diag; \ + CHECK_BETTER(left, tr, tc - hstep); \ + CHECK_BETTER(right, tr, tc + hstep); \ + CHECK_BETTER(up, tr - hstep, tc); \ + CHECK_BETTER(down, tr + hstep, tc); \ + whichdir = (left < right ? 0 : 1) + \ + (up < down ? 0 : 2); \ + switch (whichdir) { \ + case 0: \ + CHECK_BETTER(diag, tr - hstep, tc - hstep); \ + break; \ + case 1: \ + CHECK_BETTER(diag, tr - hstep, tc + hstep); \ + break; \ + case 2: \ + CHECK_BETTER(diag, tr + hstep, tc - hstep); \ + break; \ + case 3: \ + CHECK_BETTER(diag, tr + hstep, tc + hstep); \ + break; \ + } \ + } + +#define SECOND_LEVEL_CHECKS \ + { \ + int kr, kc; \ + unsigned int second; \ + if (tr != br && tc != bc) { \ + kr = br - tr; \ + kc = bc - tc; \ + CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ + CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ + } else if (tr == br && tc != bc) { \ + kc = bc - tc; \ + CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ + CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ + switch (whichdir) { \ + case 0: \ + case 1: \ + CHECK_BETTER(second, tr + hstep, tc + kc); \ + break; \ + case 2: \ + case 3: \ + CHECK_BETTER(second, tr - hstep, tc + kc); \ + break; \ + } \ + } else if (tr != br && tc == bc) { \ + kr = br - tr; \ + CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ + CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ + switch (whichdir) { \ + case 0: \ + case 2: \ + CHECK_BETTER(second, tr + kr, tc + hstep); \ + break; \ + case 1: \ + case 3: \ + CHECK_BETTER(second, tr + kr, tc - hstep); \ + break; \ + } \ + } \ + } + int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, @@ -261,7 +326,6 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; - unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = iters_per_step; @@ -306,32 +370,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, // common with the last iteration could be 2 ( if diag selected) while (halfiters--) { // 1/2 pel - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -343,32 +385,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, if (forced_stop != 2) { hstep >>= 1; while (quarteriters--) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -378,32 +398,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, forced_stop == 0) { hstep >>= 1; while (eighthiters--) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -419,6 +417,105 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, return besterr; } +int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1) { + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; + MACROBLOCKD *xd = &x->e_mbd; + int rr, rc, br, bc, hstep; + int tr, tc; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int whichdir; + int thismse; + int maxc, minc, maxr, minr; + int y_stride; + int offset; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; + + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + + bestmv->as_mv.col; + + y_stride = xd->plane[0].pre[0].stride; + + rr = ref_mv->as_mv.row; + rc = ref_mv->as_mv.col; + br = bestmv->as_mv.row << 3; + bc = bestmv->as_mv.col << 3; + hstep = 4; + minc = MAX(x->mv_col_min << 3, + (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); + maxc = MIN(x->mv_col_max << 3, + (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); + minr = MAX(x->mv_row_min << 3, + (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); + maxr = MIN(x->mv_row_max << 3, + (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); + + tr = br; + tc = bc; + + offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + + // central mv + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + + // calculate central point error + besterr = vfp->vf(y, y_stride, z, src_stride, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + + // 1/2 pel + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + bestmv->as_mv.row = br; + bestmv->as_mv.col = bc; + + if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + #undef DIST /* returns subpixel variance error function */ #define DIST(r, c) \ @@ -443,7 +540,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; - unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = iters_per_step; @@ -478,7 +574,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, tr = br; tc = bc; - offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv @@ -497,32 +592,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, // common with the last iteration could be 2 ( if diag selected) while (halfiters--) { // 1/2 pel - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -534,32 +607,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, if (forced_stop != 2) { hstep >>= 1; while (quarteriters--) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -569,32 +620,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, forced_stop == 0) { hstep >>= 1; while (eighthiters--) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - + FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; - tr = br; tc = bc; } @@ -609,6 +638,116 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, return besterr; } +int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { + uint8_t *z = x->plane[0].src.buf; + int src_stride = x->plane[0].src.stride; + MACROBLOCKD *xd = &x->e_mbd; + int rr, rc, br, bc, hstep; + int tr, tc; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int whichdir; + int thismse; + int maxc, minc, maxr, minr; + int y_stride; + int offset; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; + + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); + uint8_t *y = xd->plane[0].pre[0].buf + + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + + bestmv->as_mv.col; + + y_stride = xd->plane[0].pre[0].stride; + + rr = ref_mv->as_mv.row; + rc = ref_mv->as_mv.col; + br = bestmv->as_mv.row << 3; + bc = bestmv->as_mv.col << 3; + hstep = 4; + minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - + ((1 << MV_MAX_BITS) - 1)); + maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + + ((1 << MV_MAX_BITS) - 1)); + minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - + ((1 << MV_MAX_BITS) - 1)); + maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + + ((1 << MV_MAX_BITS) - 1)); + + tr = br; + tc = bc; + + + offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + + // central mv + bestmv->as_mv.row <<= 3; + bestmv->as_mv.col <<= 3; + + // calculate central point error + // TODO(yunqingwang): central pointer error was already calculated in full- + // pixel search, and can be passed in this function. + comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + + // Each subsequent iteration checks at least one point in + // common with the last iteration could be 2 ( if diag selected) + // 1/2 pel + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + bestmv->as_mv.row = br; + bestmv->as_mv.col = bc; + + if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + #undef MVC #undef PRE #undef DIST diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index b91e6fd346e1a8fdd51a00132ede24870af65891..35ef483c6c87567abc205ee8879b4763d91677e4 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -80,6 +80,21 @@ typedef int (fractional_mv_step_fp) ( int *distortion, unsigned int *sse); extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative; +extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; + +typedef int (fractional_mv_step_comp_fp) ( + MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, // 0 - full, 1 - qtr only, 2 - half only + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, unsigned int *sse1, + const uint8_t *second_pred, + int w, int h); +extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_iterative; +extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree; typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, @@ -102,18 +117,6 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv); -int vp9_find_best_sub_pixel_comp_iterative( - MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, // 0 - full, 1 - qtr only, 2 - half only - int iters_per_step, - int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, - int w, int h); - int vp9_refining_search_8p_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 2bfee63d6524b9a97b978e0e1d1c34df8e5ede26..914476c6e9b4b7a390b6fadac7c65da731758f1e 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -709,8 +709,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = NSTEP; sf->auto_filter = 1; sf->recode_loop = 1; - sf->subpel_search_method = SUBPEL_ITERATIVE; - sf->subpel_iters_per_step = 3; + sf->subpel_search_method = SUBPEL_TREE; + sf->subpel_iters_per_step = 2; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; @@ -829,7 +829,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1; sf->search_method = SQUARE; - sf->subpel_iters_per_step = 2; + sf->subpel_iters_per_step = 1; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; @@ -923,9 +923,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative; - } else { - // TODO(debargha): Other methods to come - assert(0); + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_iterative; + } else if (cpi->sf.subpel_search_method == SUBPEL_TREE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; } cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index fdc1081355731d29b22b39b3c5b1988dfe697345..083ff71e5e7b19446d60beb03dbb0fb2345aae51 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -234,6 +234,7 @@ typedef enum { typedef enum { SUBPEL_ITERATIVE = 0, + SUBPEL_TREE = 1, // Other methods to come } SUBPEL_SEARCH_METHODS; @@ -534,6 +535,7 @@ typedef struct VP9_COMP { unsigned int active_map_enabled; fractional_mv_step_fp *find_fractional_mv_step; + fractional_mv_step_comp_fp *find_fractional_mv_step_comp; vp9_full_search_fn_t full_search_sad; vp9_refining_search_fn_t refining_search_sad; vp9_diamond_search_fn_t diamond_search_sad; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3cc526d03eca3848dfb384faee5c13ae69a93f4e..9e4ce711b593a72a8ac151a7a18d4a8e36c8674f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2663,7 +2663,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - bestsme = vp9_find_best_sub_pixel_comp_iterative( + bestsme = cpi->find_fractional_mv_step_comp( x, &tmp_mv, &ref_mv[id], x->errorperbit,