From 1374a06bd87e2e2c41e17738bf5215d53e5223a0 Mon Sep 17 00:00:00 2001 From: Yaowu Xu <yaowu@google.com> Date: Thu, 27 Jun 2013 12:07:07 -0700 Subject: [PATCH] Optimize partition search order This commit change the partition search order to allow checking of rectangular partition to be done after square partitions. It also added a speed feature to skip rectangular partition check when NONE is better than SPLIT in RD sense. This feature roughly speed up encoder by 1.5X with loss on compression -0.91% on cif set -0.56% on stdhd set Change-Id: I0d2d06993041aa9ea9073fcc39c54f73a127dfa4 --- vp9/encoder/vp9_encodeframe.c | 133 +++++++++++++++++----------------- vp9/encoder/vp9_onyx_if.c | 13 +++- vp9/encoder/vp9_onyx_int.h | 1 + 3 files changed, 78 insertions(+), 69 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 168b460127..33e532dfad 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1351,71 +1351,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, if (!cpi->sf.use_partitions_less_than || (cpi->sf.use_partitions_less_than && bsize <= cpi->sf.less_than_block_size)) { - // PARTITION_HORZ - if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { - int r2, r = 0; - int64_t d2, d = 0; - subsize = get_subsize(bsize, PARTITION_HORZ); - *(get_sb_index(xd, subsize)) = 0; - pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, - get_block_context(x, subsize)); - - if (mi_row + (ms >> 1) < cm->mi_rows) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); - - *(get_sb_index(xd, subsize)) = 1; - pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize, - get_block_context(x, subsize)); - r2 += r; - d2 += d; - } - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - if (r2 < INT_MAX) - r2 += x->partition_cost[pl][PARTITION_HORZ]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) - < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { - srate = r2; - sdist = d2; - *(get_sb_partitioning(x, bsize)) = subsize; - } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - } - - // PARTITION_VERT - if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { - int r2; - int64_t d2; - subsize = get_subsize(bsize, PARTITION_VERT); - *(get_sb_index(xd, subsize)) = 0; - pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, - get_block_context(x, subsize)); - if (mi_col + (ms >> 1) < cm->mi_cols) { - int r = 0; - int64_t d = 0; - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); - - *(get_sb_index(xd, subsize)) = 1; - pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize, - get_block_context(x, subsize)); - r2 += r; - d2 += d; - } - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - if (r2 < INT_MAX) - r2 += x->partition_cost[pl][PARTITION_VERT]; - if (RDCOST(x->rdmult, x->rddiv, r2, d2) - < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { - srate = r2; - sdist = d2; - *(get_sb_partitioning(x, bsize)) = subsize; - } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - } - + int larger_is_better = 0; // PARTITION_NONE if ((mi_row + (ms >> 1) < cm->mi_rows) && (mi_col + (ms >> 1) < cm->mi_cols)) { @@ -1433,10 +1369,77 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { srate = r; sdist = d; + larger_is_better = 1; if (bsize >= BLOCK_SIZE_SB8X8) *(get_sb_partitioning(x, bsize)) = bsize; } } + if (!cpi->sf.less_rectangular_check || !larger_is_better) { + // PARTITION_HORZ + if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + int r2, r = 0; + int64_t d2, d = 0; + subsize = get_subsize(bsize, PARTITION_HORZ); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + + if (mi_row + (ms >> 1) < cm->mi_rows) { + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_HORZ]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) + < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } + + // PARTITION_VERT + if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { + int r2; + int64_t d2; + subsize = get_subsize(bsize, PARTITION_VERT); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, + get_block_context(x, subsize)); + if (mi_col + (ms >> 1) < cm->mi_cols) { + int r = 0; + int64_t d = 0; + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize, + get_block_context(x, subsize)); + r2 += r; + d2 += d; + } + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + if (r2 < INT_MAX) + r2 += x->partition_cost[pl][PARTITION_VERT]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) + < RDCOST(x->rdmult, x->rddiv, srate, sdist)) { + srate = r2; + sdist = d2; + *(get_sb_partitioning(x, bsize)) = subsize; + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } + } } *rate = srate; *dist = sdist; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 3b09b9f11f..b7635ce77f 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -659,7 +659,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { int mode = cpi->compressor_speed; int speed = cpi->speed; int i; - // Only modes 0 and 1 supported for now in experimental code basae if (mode > 1) mode = 1; @@ -699,6 +698,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->adjust_thresholds_by_speed = 0; sf->partition_by_variance = 0; sf->use_one_partition_size_always = 0; + sf->less_rectangular_check = 0; sf->use_partitions_less_than = 0; sf->less_than_block_size = BLOCK_SIZE_MB16X16; sf->use_partitions_greater_than = 0; @@ -730,12 +730,17 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (speed == 1) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; - sf->use_largest_txform = !(cpi->common.frame_type == KEY_FRAME || - cpi->common.intra_only || - cpi->common.show_frame == 0); + sf->less_rectangular_check = 1; + sf->use_largest_txform = !(cpi->common.frame_type == KEY_FRAME || + cpi->common.intra_only || + cpi->common.show_frame == 0); } if (speed == 2) { + sf->use_largest_txform = !(cpi->common.frame_type == KEY_FRAME || + cpi->common.intra_only || + cpi->common.show_frame == 0); sf->adjust_thresholds_by_speed = 1; + sf->less_rectangular_check = 1; sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->reduce_first_step_size = 1; sf->optimize_coefficients = 0; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 63b0155491..a1f567aedb 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -226,6 +226,7 @@ typedef struct { int adjust_thresholds_by_speed; int partition_by_variance; int use_one_partition_size_always; + int less_rectangular_check; BLOCK_SIZE_TYPE always_this_block_size; int use_partitions_greater_than; BLOCK_SIZE_TYPE greater_than_block_size; -- GitLab