From a76caa7ff447dac52d9c705d5791b2314882f13a Mon Sep 17 00:00:00 2001 From: Paul Wilkins <paulwilkins@google.com> Date: Thu, 19 Sep 2013 18:20:18 +0100 Subject: [PATCH] Alter Speed 3. This patch deletes the variance based speed three partitioning. Speed 3 now uses the same partitioning method as speed 2 but with some stricter conditions. The speed and quality are now somewhere between speeds 2 and 4 whereas before it was worse in both than speed 4. Change-Id: Ia142e7007299d79db3ceee6ca8670540db6f7a41 --- vp9/encoder/vp9_encodeframe.c | 323 +--------------------------------- vp9/encoder/vp9_onyx_if.c | 17 +- vp9/encoder/vp9_onyx_int.h | 1 - 3 files changed, 14 insertions(+), 327 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f6045e80b4..a2adcd386f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -951,323 +951,6 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8, } } -static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8, - BLOCK_SIZE bsize, int mis, int mi_row, - int mi_col) { - int r, c; - const int bs = MAX(num_8x8_blocks_wide_lookup[bsize], - num_8x8_blocks_high_lookup[bsize]); - const int idx_str = mis * mi_row + mi_col; - MODE_INFO **const mi2 = &mi_8x8[idx_str]; - - mi2[0] = cm->mi + idx_str; - mi2[0]->mbmi.sb_type = bsize; - - for (r = 0; r < bs; r++) - for (c = 0; c < bs; c++) - if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols) - mi2[r * mis + c] = mi2[0]; -} - -typedef struct { - int64_t sum_square_error; - int64_t sum_error; - int count; - int variance; -} var; - -typedef struct { - var none; - var horz[2]; - var vert[2]; -} partition_variance; - -#define VT(TYPE, BLOCKSIZE) \ - typedef struct { \ - partition_variance vt; \ - BLOCKSIZE split[4]; } TYPE; - -VT(v8x8, var) -VT(v16x16, v8x8) -VT(v32x32, v16x16) -VT(v64x64, v32x32) - -typedef struct { - partition_variance *vt; - var *split[4]; -} vt_node; - -typedef enum { - V16X16, - V32X32, - V64X64, -} TREE_LEVEL; - -static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) { - int i; - switch (bsize) { - case BLOCK_64X64: { - v64x64 *vt = (v64x64 *) data; - node->vt = &vt->vt; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].vt.none; - break; - } - case BLOCK_32X32: { - v32x32 *vt = (v32x32 *) data; - node->vt = &vt->vt; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].vt.none; - break; - } - case BLOCK_16X16: { - v16x16 *vt = (v16x16 *) data; - node->vt = &vt->vt; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].vt.none; - break; - } - case BLOCK_8X8: { - v8x8 *vt = (v8x8 *) data; - node->vt = &vt->vt; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i]; - break; - } - default: - node->vt = 0; - for (i = 0; i < 4; i++) - node->split[i] = 0; - assert(-1); - } -} - -// Set variance values given sum square error, sum error, count. -static void fill_variance(var *v, int64_t s2, int64_t s, int c) { - v->sum_square_error = s2; - v->sum_error = s; - v->count = c; - if (c > 0) - v->variance = (int)(256 - * (v->sum_square_error - v->sum_error * v->sum_error / v->count) - / v->count); - else - v->variance = 0; -} - -// Combine 2 variance structures by summing the sum_error, sum_square_error, -// and counts and then calculating the new variance. -void sum_2_variances(var *r, var *a, var*b) { - fill_variance(r, a->sum_square_error + b->sum_square_error, - a->sum_error + b->sum_error, a->count + b->count); -} - -static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { - vt_node node; - tree_to_node(data, bsize, &node); - sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]); - sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]); - sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]); - sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]); - sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]); -} - -#if PERFORM_RANDOM_PARTITIONING -static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m, - BLOCK_SIZE block_size, int mi_row, - int mi_col, int mi_size) { - VP9_COMMON * const cm = &cpi->common; - vt_node vt; - const int mis = cm->mode_info_stride; - int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex; - - tree_to_node(data, block_size, &vt); - - // split none is available only if we have more than half a block size - // in width and height inside the visible image - if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows && - (rand() & 3) < 1) { - set_block_size(cm, m, block_size, mis, mi_row, mi_col); - return 1; - } - - // vertical split is available on all but the bottom border - if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold - && (rand() & 3) < 1) { - set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row, - mi_col); - return 1; - } - - // horizontal split is available on all but the right border - if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold - && (rand() & 3) < 1) { - set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row, - mi_col); - return 1; - } - - return 0; -} - -#else // !PERFORM_RANDOM_PARTITIONING - -static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m, - BLOCK_SIZE bsize, int mi_row, - int mi_col, int mi_size) { - VP9_COMMON * const cm = &cpi->common; - vt_node vt; - const int mis = cm->mode_info_stride; - int64_t threshold = 50 * cpi->common.base_qindex; - - tree_to_node(data, bsize, &vt); - - // split none is available only if we have more than half a block size - // in width and height inside the visible image - if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows - && vt.vt->none.variance < threshold) { - set_block_size(cm, m, bsize, mis, mi_row, mi_col); - return 1; - } - - // vertical split is available on all but the bottom border - if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold - && vt.vt->vert[1].variance < threshold) { - set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row, - mi_col); - return 1; - } - - // horizontal split is available on all but the right border - if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold - && vt.vt->horz[1].variance < threshold) { - set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row, - mi_col); - return 1; - } - - return 0; -} -#endif // PERFORM_RANDOM_PARTITIONING - -static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8, - int mi_row, int mi_col) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - const int mis = cm->mode_info_stride; - // TODO(JBB): More experimentation or testing of this threshold; - int64_t threshold = 4; - int i, j, k; - v64x64 vt; - unsigned char * s; - int sp; - const unsigned char * d; - int dp; - int pixels_wide = 64, pixels_high = 64; - - vp9_zero(vt); - set_offsets(cpi, mi_row, mi_col, BLOCK_64X64); - - if (xd->mb_to_right_edge < 0) - pixels_wide += (xd->mb_to_right_edge >> 3); - - if (xd->mb_to_bottom_edge < 0) - pixels_high += (xd->mb_to_bottom_edge >> 3); - - s = x->plane[0].src.buf; - sp = x->plane[0].src.stride; - - // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want - // but this needs more experimentation. - threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex; - - d = vp9_64x64_zeros; - dp = 64; - if (cm->frame_type != KEY_FRAME) { - int_mv nearest_mv, near_mv; - const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)]; - YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; - YV12_BUFFER_CONFIG *second_ref_fb = NULL; - - setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col, - &xd->scale_factor[0]); - setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col, - &xd->scale_factor[1]); - - xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME; - xd->this_mi->mbmi.sb_type = BLOCK_64X64; - vp9_find_best_ref_mvs(xd, - mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]], - &nearest_mv, &near_mv); - - xd->this_mi->mbmi.mv[0] = nearest_mv; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); - - d = xd->plane[0].dst.buf; - dp = xd->plane[0].dst.stride; - } - - // Fill in the entire tree of 8x8 variances for splits. - for (i = 0; i < 4; i++) { - const int x32_idx = ((i & 1) << 5); - const int y32_idx = ((i >> 1) << 5); - for (j = 0; j < 4; j++) { - const int x16_idx = x32_idx + ((j & 1) << 4); - const int y16_idx = y32_idx + ((j >> 1) << 4); - v16x16 *vst = &vt.split[i].split[j]; - for (k = 0; k < 4; k++) { - int x_idx = x16_idx + ((k & 1) << 3); - int y_idx = y16_idx + ((k >> 1) << 3); - unsigned int sse = 0; - int sum = 0; - if (x_idx < pixels_wide && y_idx < pixels_high) - vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); - fill_variance(&vst->split[k].vt.none, sse, sum, 64); - } - } - } - // Fill the rest of the variance tree by summing the split partition - // values. - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); - } - fill_variance_tree(&vt.split[i], BLOCK_32X32); - } - fill_variance_tree(&vt, BLOCK_64X64); - // Now go through the entire structure, splitting every block size until - // we get to one that's got a variance lower than our threshold, or we - // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col, - 4)) { - for (i = 0; i < 4; ++i) { - const int x32_idx = ((i & 1) << 2); - const int y32_idx = ((i >> 1) << 2); - if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32, - (mi_row + y32_idx), (mi_col + x32_idx), 2)) { - for (j = 0; j < 4; ++j) { - const int x16_idx = ((j & 1) << 1); - const int y16_idx = ((j >> 1) << 1); - if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8, - BLOCK_16X16, - (mi_row + y32_idx + y16_idx), - (mi_col + x32_idx + x16_idx), 1)) { - for (k = 0; k < 4; ++k) { - const int x8_idx = (k & 1); - const int y8_idx = (k >> 1); - set_block_size(cm, mi_8x8, BLOCK_8X8, mis, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx)); - } - } - } - } - } - } -} - static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, @@ -2060,7 +1743,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, if (cpi->sf.reference_masking) rd_pick_reference_frame(cpi, mi_row, mi_col); - if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning || + if (cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { const int idx_str = cm->mode_info_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; @@ -2072,10 +1755,6 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, set_partitioning(cpi, mi_8x8, mi_row, mi_col); rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_by_variance) { - choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col); - rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1); } else { if ((cpi->common.current_video_frame % cpi->sf.last_partitioning_redo_frequency) == 0 diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index d815dc96a5..16e83f1931 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -703,7 +703,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->adaptive_motion_search = 0; sf->use_avoid_tested_higherror = 0; sf->reference_masking = 0; - sf->partition_by_variance = 0; sf->use_one_partition_size_always = 0; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; @@ -826,8 +825,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_skip_start = 6; } if (speed == 3) { + sf->less_rectangular_check = 1; + sf->use_square_partition_only = 1; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->partition_by_variance = 1; + sf->use_lastframe_partitioning = 1; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only || cpi->common.show_frame == 0) ? @@ -839,17 +842,23 @@ void vp9_set_speed_features(VP9_COMP *cpi) { FLAG_SKIP_COMP_REFMISMATCH | FLAG_SKIP_INTRA_LOWVAR | FLAG_EARLY_TERMINATE; + sf->intra_y_mode_mask = INTRA_DC_ONLY; + sf->intra_uv_mode_mask = INTRA_DC_ONLY; + sf->use_uv_intra_rd_estimate = 1; sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->use_lp32x32fdct = 1; + sf->adaptive_motion_search = 1; + sf->using_small_partition_info = 0; sf->disable_splitmv = 1; sf->auto_mv_step_size = 1; sf->search_method = BIGDIA; sf->subpel_iters_per_step = 1; + sf->use_fast_lpf_pick = 1; + sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_interval = 2; sf->disable_split_var_thresh = 64; sf->disable_filter_search_var_thresh = 64; - sf->intra_y_mode_mask = INTRA_DC_ONLY; - sf->intra_uv_mode_mask = INTRA_DC_ONLY; sf->use_fast_coef_updates = 2; sf->mode_skip_start = 6; } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index a106014f85..1002ff756d 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -250,7 +250,6 @@ typedef struct { TX_SIZE_SEARCH_METHOD tx_size_search_method; int use_lp32x32fdct; int use_avoid_tested_higherror; - int partition_by_variance; int use_one_partition_size_always; int less_rectangular_check; int use_square_partition_only; -- GitLab