Commit bacc67f4 authored by jackychen's avatar jackychen

vp9: Skip some modes when variance is low for big blocks, for 1 pass real-time.

Skip intra-mode and some inter-modes (newmv, nearmv, nearestmv) for
golden frame if the variance got from choose_partitioning is very low.
Only for 1 pass real-time CBR mode and bsize >= 32x32, it has ~2.5%
speed up with less than 0.1% PSNR drop for rtc test set. Don't see
visual regression.

Change-Id: I70efbc95a1007231ae36f02c5b2fbf6cd35077ad
parent 204809bf
......@@ -145,6 +145,11 @@ struct macroblock {
uint8_t sb_is_skin;
// Used to save the status of whether a block has a low variance in
// choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
// 32x32.
uint8_t variance_low[9];
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
......
......@@ -747,6 +747,8 @@ static int choose_partitioning(VP9_COMP *cpi,
const uint8_t *d;
int sp;
int dp;
// Ref frame used in partitioning.
MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
int pixels_wide = 64, pixels_high = 64;
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
......@@ -771,6 +773,10 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
for (i = 0; i < 9; i++) {
x->variance_low[i] = 0;
}
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
if (xd->mb_to_bottom_edge < 0)
......@@ -831,8 +837,10 @@ static int choose_partitioning(VP9_COMP *cpi,
mi->ref_frame[0] = GOLDEN_FRAME;
mi->mv[0].as_int = 0;
y_sad = y_sad_g;
ref_frame_partition = GOLDEN_FRAME;
} else {
x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
ref_frame_partition = LAST_FRAME;
}
set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
......@@ -1019,6 +1027,31 @@ static int choose_partitioning(VP9_COMP *cpi,
force_split[0] = 1;
}
if (cpi->sf.short_circuit_low_temp_var) {
// Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
// selected.
if (ref_frame_partition == LAST_FRAME) {
// 64x64
if (vt.part_variances.none.variance < (thresholds[0] >> 1))
x->variance_low[0] = 1;
// 64x32
if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
x->variance_low[1] = 1;
if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
x->variance_low[2] = 1;
// 32x64
if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
x->variance_low[3] = 1;
if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
x->variance_low[4] = 1;
// 32x32
for (i = 0; i < 4; i++) {
if (vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
x->variance_low[i + 5] = 1;
}
}
}
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold.
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
......
......@@ -1126,34 +1126,38 @@ static INLINE void find_predictors(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col,
struct buf_2d yv12_mb[4][MAX_MB_PLANE],
BLOCK_SIZE bsize) {
BLOCK_SIZE bsize,
int force_skip_low_temp_var) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
TileInfo *const tile_info = &tile_data->tile_info;
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
// this needs various further optimizations. to be continued..
// this needs various further optimizations. to be continued..
if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
if (cm->use_prev_frame_mvs)
if (cm->use_prev_frame_mvs) {
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
candidates, mi_row, mi_col,
x->mbmi_ext->mode_context);
else
const_motion[ref_frame] =
mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
(int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
} else {
const_motion[ref_frame] =
mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
(int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
}
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) {
// Early exit for golden frame if force_skip_low_temp_var is set.
if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 &&
!(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
ref_frame, bsize);
}
......@@ -1266,6 +1270,39 @@ static void recheck_zeromv_after_denoising(
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
int force_skip_low_temp_var = 0;
// Set force_skip_low_temp_var based on the block size and block offset.
if (bsize == BLOCK_64X64) {
force_skip_low_temp_var = variance_low[0];
} else if (bsize == BLOCK_64X32) {
if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[1];
} else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[2];
}
} else if (bsize == BLOCK_32X64) {
if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[3];
} else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[4];
}
} else if (bsize == BLOCK_32X32) {
if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[5];
} else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[6];
} else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[7];
} else if ((mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[8];
}
}
return force_skip_low_temp_var;
}
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
......@@ -1324,6 +1361,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int svc_force_zero_mode[3] = {0};
int perform_intra_pred = 1;
int use_golden_nonzeromv = 1;
int force_skip_low_temp_var = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
......@@ -1410,14 +1448,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
if (cpi->sf.short_circuit_low_temp_var) {
force_skip_low_temp_var =
set_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
}
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
!svc_force_zero_mode[GOLDEN_FRAME - 1]))
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col,
yv12_mb, bsize);
yv12_mb, bsize, force_skip_low_temp_var);
}
for (idx = 0; idx < RT_INTER_MODES; ++idx) {
......@@ -1429,6 +1472,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int is_skippable;
int this_early_term = 0;
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
if (cpi->use_svc)
this_mode = ref_mode_set_svc[idx].pred_mode;
......@@ -1447,17 +1491,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
if (cpi->use_svc) {
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
if (!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
if (!force_skip_low_temp_var &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
......@@ -1548,8 +1602,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
if (use_golden_nonzeromv &&
this_mode == NEWMV && ref_frame == LAST_FRAME &&
// If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
// need to compute best_pred_sad which is only used to skip golden NEWMV.
if (use_golden_nonzeromv && this_mode == NEWMV &&
ref_frame == LAST_FRAME &&
frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
const int pre_stride = xd->plane[0].pre[0].stride;
const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
......@@ -1786,11 +1842,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (perform_intra_pred &&
((best_rdc.rdcost == INT64_MAX ||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize)))) {
// threshold. Skip intra prediction if force_skip_low_temp_var is set.
if (!force_skip_low_temp_var && perform_intra_pred &&
(best_rdc.rdcost == INT64_MAX ||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize))) {
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
......
......@@ -429,6 +429,11 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
content != VP9E_CONTENT_SCREEN) {
// Enable short circuit when temporal variance is very low.
sf->short_circuit_low_temp_var = 1;
}
}
if (speed >= 7) {
......@@ -554,6 +559,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->default_interp_filter = SWITCHABLE;
sf->simple_model_rd_from_var = 0;
sf->short_circuit_flat_blocks = 0;
sf->short_circuit_low_temp_var = 0;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
......
......@@ -446,6 +446,10 @@ typedef struct SPEED_FEATURES {
// Skip a number of expensive mode evaluations for blocks with zero source
// variance.
int short_circuit_flat_blocks;
// Skip a number of expensive mode evaluations for blocks with very low
// temporal variance.
int short_circuit_low_temp_var;
} SPEED_FEATURES;
struct VP9_COMP;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment