diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 67b809f8dd5991136f06c4f7ee715f049fbf607f..314e195d251ee8cfb000ed38599e2e3447eed753 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -59,7 +59,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x, dst_mv, ref_mv, x->errorperbit, &v_fn_ptr, - NULL, NULL, + 0, cpi->sf.subpel_iters_per_step, NULL, NULL, & distortion, &sse); } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 77d6554b0f84cf7b6f2efb348f708318f9f56244..014f54a49d5cd05144c03ea12323e52a2acedaa4 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -245,13 +245,15 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) -int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1) { +int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; @@ -262,9 +264,9 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - unsigned int eighthiters = 4; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; int thismse; int maxc, minc, maxr, minr; int y_stride; @@ -289,7 +291,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, tr = br; tc = bc; - offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv @@ -303,7 +304,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, // TODO: Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) - while (--halfiters) { + while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); @@ -337,41 +338,46 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, // TODO: Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel - hstep >>= 1; - while (--quarteriters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + while (quarteriters--) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // no reason to check the same one again. - if (tr == br && tc == bc) - break; + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } - tr = br; - tc = bc; + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { hstep >>= 1; - while (--eighthiters) { + while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); @@ -402,6 +408,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, tc = bc; } } + bestmv->as_mv.row = br; bestmv->as_mv.col = bc; @@ -418,14 +425,17 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ z, src_stride, &sse, second_pred) -int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, - const uint8_t *second_pred, int w, int h) { +int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; @@ -436,9 +446,9 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - unsigned int eighthiters = 4; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; int thismse; int maxc, minc, maxr, minr; int y_stride; @@ -485,7 +495,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) - while (--halfiters) { + while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); @@ -519,41 +529,46 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel - hstep >>= 1; - while (--quarteriters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + while (quarteriters--) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // no reason to check the same one again. - if (tr == br && tc == bc) - break; + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } - tr = br; - tc = bc; + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { hstep >>= 1; - while (--eighthiters) { + while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); @@ -594,7 +609,6 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, return besterr; } - #undef MVC #undef PRE #undef DIST @@ -603,627 +617,8 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, #undef MIN #undef MAX -int vp9_find_best_sub_pixel_step(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - int_mv orig_mv; - int yrow_movedback = 0, ycol_movedback = 0; - uint8_t *z = x->plane[0].src.buf; - int src_stride = x->plane[0].src.stride; - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - - uint8_t *y = xd->plane[0].pre[0].buf + - (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + - bestmv->as_mv.col; - y_stride = xd->plane[0].pre[0].stride; - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - orig_mv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // for(whichdir =0;whichdir<4;whichdir++) - // { - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride, - &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride, - &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - -// } - - - // time to check quarter pels. - if (bestmv->as_mv.row < startmv.as_mv.row) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < startmv.as_mv.col) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 4; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), - z, src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 4; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, - SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - y_stride - 1, y_stride, - SP(6), SP(6), z, src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 2; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 2; - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv))) - return bestmse; - - /* Now do 1/8th pixel */ - if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, - SP(7), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - y_stride - 1, y_stride, - SP(7), SP(7), z, src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 1; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, - SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 1; - this_mv.as_mv.row += 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - #undef SP -int vp9_find_best_half_pixel_step(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - uint8_t *z = x->plane[0].src.buf; - int src_stride = x->plane[0].src.stride; - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - - uint8_t *y = xd->plane[0].pre[0].buf + - (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; - y_stride = xd->plane[0].pre[0].stride; - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, - z, src_stride, &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, - z, src_stride, &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - #define CHECK_BOUNDS(range) \ {\ all_in = 1;\ diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 5d0c0e8ba08c0649a077c34047a018b378c8a8c5..b91e6fd346e1a8fdd51a00132ede24870af65891 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -67,12 +67,19 @@ int vp9_square_search(MACROBLOCK *x, int_mv *center_mv, int_mv *best_mv); -typedef int (fractional_mv_step_fp) (MACROBLOCK *x, int_mv - *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse); -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively; -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step; -extern fractional_mv_step_fp vp9_find_best_half_pixel_step; +typedef int (fractional_mv_step_fp) ( + MACROBLOCK *x, + int_mv *bestmv, + int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, // 0 - full, 1 - qtr only, 2 - half only + int iters_per_step, + int *mvjcost, + int *mvcost[2], + int *distortion, + unsigned int *sse); +extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative; typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, @@ -95,14 +102,17 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv); -int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, - int w, int h); +int vp9_find_best_sub_pixel_comp_iterative( + MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, // 0 - full, 1 - qtr only, 2 - half only + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, unsigned int *sse1, + const uint8_t *second_pred, + int w, int h); int vp9_refining_search_8p_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index d1db91086d389291ac1502afc36e80dcfb8ebc6e..cf5ae5252d10bbe5be29bb93f6b97f9e031fb2a9 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -713,9 +713,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = NSTEP; sf->auto_filter = 1; sf->recode_loop = 1; - sf->quarter_pixel_search = 1; - sf->half_pixel_search = 1; - sf->iterative_sub_pixel = 1; + sf->subpel_search_method = SUBPEL_ITERATIVE; + sf->subpel_iters_per_step = 3; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; @@ -831,6 +830,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1; sf->search_method = SQUARE; + sf->subpel_iters_per_step = 2; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; @@ -851,6 +851,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_splitmv = 1; sf->auto_mv_step_size = 1; sf->search_method = BIGDIA; + sf->subpel_iters_per_step = 1; } if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; @@ -875,6 +876,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_splitmv = 1; sf->search_method = HEX; + sf->subpel_iters_per_step = 1; } /* if (speed == 2) { @@ -918,12 +920,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; - if (cpi->sf.iterative_sub_pixel == 1) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively; - } else if (cpi->sf.quarter_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step; - } else if (cpi->sf.half_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step; + if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative; + } else { + // TODO(debargha): Other methods to come + assert(0); } cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7eae7007fe7836e4f1d7cf8e2cee6d6857af1713..12491076987bbc5ce6fdf1dd20df0fa326222509 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -232,14 +232,18 @@ typedef enum { FLAG_SKIP_INTRA_LOWVAR = 32, } MODE_SEARCH_SKIP_LOGIC; +typedef enum { + SUBPEL_ITERATIVE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + typedef struct { int RD; SEARCH_METHODS search_method; int auto_filter; int recode_loop; - int iterative_sub_pixel; - int half_pixel_search; - int quarter_pixel_search; + SUBPEL_SEARCH_METHODS subpel_search_method; + int subpel_iters_per_step; int thresh_mult[MAX_MODES]; int max_step_search_steps; int reduce_first_step_size; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 238c9815bcf3195e70abce07f6c68fdf6bfc6305..90d35f86a9e109079ab5f4166cd306d22ad6d4e0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1967,6 +1967,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse; cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], bsi->ref_mv, x->errorperbit, v_fn_ptr, + 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, &sse); @@ -2547,6 +2548,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, x->errorperbit, &cpi->fn_ptr[block_size], + 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &sse); } @@ -2673,13 +2675,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, - &ref_mv[id], - x->errorperbit, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - pw, ph); + bestsme = vp9_find_best_sub_pixel_comp_iterative( + x, &tmp_mv, + &ref_mv[id], + x->errorperbit, + &cpi->fn_ptr[block_size], + 0, cpi->sf.subpel_iters_per_step, + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph); } if (id) diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 11d601bb36910120de8bc1004300bdb80f30c100..3052e8f70f07281cf58afa383344eadb881a3fad 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -170,6 +170,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], + 0, cpi->sf.subpel_iters_per_step, NULL, NULL, &distortion, &sse); }