diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 001ac69bdd85422a41721e4ade51015a914504cd..e90b8dd090527791932db6b7a97992ba8849dc90 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3364,7 +3364,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + if (!cpi->sf.reuse_inter_pred_sby) + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + + vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); if (!x->skip) { mbmi->skip = 1; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index f3b2d2fa241296b2ad7526c346bf6aa50474ab99..b621da35f5f1a3c1d3022c9e56a1f2c5701d9644 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -23,6 +23,7 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" @@ -183,6 +184,22 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_dist_sum += dist << 4; } +static int get_pred_buffer(PRED_BUFFER *p, int len) { + int i; + + for (i = 0; i < len; i++) { + if (!p[i].in_use) { + p[i].in_use = 1; + return i; + } + } + return -1; +} + +static void free_pred_buffer(PRED_BUFFER *p) { + p->in_use = 0; +} + // TODO(jingning) placeholder for inter-frame non-RD mode decision. // this needs various further optimizations. to be continued.. int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -229,6 +246,31 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int pred_filter_search = (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm)) % 2; + // For speed 6, the result of interp filter is reused later in actual encoding + // process. + int bh = num_4x4_blocks_high_lookup[bsize] << 2; + int bw = num_4x4_blocks_wide_lookup[bsize] << 2; + int pixels_in_block = bh * bw; + // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. + PRED_BUFFER tmp[4]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64); + struct buf_2d orig_dst = pd->dst; + PRED_BUFFER *best_pred = NULL; + PRED_BUFFER *this_mode_pred = NULL; + int i; + + if (cpi->sf.reuse_inter_pred_sby) { + for (i = 0; i < 3; i++) { + tmp[i].data = &pred_buf[pixels_in_block * i]; + tmp[i].stride = bw; + tmp[i].in_use = 0; + } + + tmp[3].data = pd->dst.buf; + tmp[3].stride = pd->dst.stride; + tmp[3].in_use = 0; + } + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; @@ -324,6 +366,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., // the last three bits are all zeros. + if (cpi->sf.reuse_inter_pred_sby) { + if (this_mode == NEARESTMV) { + this_mode_pred = &tmp[3]; + } else { + this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = this_mode_pred->data; + pd->dst.stride = bw; + } + } + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && ((mbmi->mv[0].as_mv.row & 0x07) != 0 || @@ -334,6 +386,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int pf_sse[3]; int64_t best_cost = INT64_MAX; INTERP_FILTER best_filter = SWITCHABLE, filter; + PRED_BUFFER *current_pred = this_mode_pred; for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { int64_t cost; @@ -345,12 +398,28 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_switchable_rate(cpi) + pf_rate[filter], pf_dist[filter]); if (cost < best_cost) { - best_filter = filter; - best_cost = cost; - skip_txfm = x->skip_txfm; + best_filter = filter; + best_cost = cost; + skip_txfm = x->skip_txfm; + + if (cpi->sf.reuse_inter_pred_sby) { + if (this_mode_pred != current_pred) { + free_pred_buffer(this_mode_pred); + this_mode_pred = current_pred; + } + + if (filter < EIGHTTAP_SHARP) { + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; + } + } } } + if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred) + free_pred_buffer(current_pred); + mbmi->interp_filter = best_filter; rate = pf_rate[mbmi->interp_filter]; dist = pf_dist[mbmi->interp_filter]; @@ -451,6 +520,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; skip_txfm = x->skip_txfm; + + if (cpi->sf.reuse_inter_pred_sby) { + if (best_pred != NULL) + free_pred_buffer(best_pred); + + best_pred = this_mode_pred; + } + } else { + if (cpi->sf.reuse_inter_pred_sby) + free_pred_buffer(this_mode_pred); } if (x->skip) @@ -458,6 +537,19 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } + // If best prediction is not in dst buf, then copy the prediction block from + // temp buf to dst buf. + if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) { + uint8_t *copy_from, *copy_to; + + pd->dst = orig_dst; + copy_to = pd->dst.buf; + + copy_from = best_pred->data; + + vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0, + bw, bh); + } mbmi->mode = best_mode; mbmi->interp_filter = best_pred_filter; @@ -471,12 +563,21 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip && best_rd > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize) { for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { + if (cpi->sf.reuse_inter_pred_sby) { + pd->dst.buf = tmp[0].data; + pd->dst.stride = bw; + } + vp9_predict_intra_block(xd, 0, b_width_log2(bsize), mbmi->tx_size, this_mode, &p->src.buf[0], p->src.stride, &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); + + if (cpi->sf.reuse_inter_pred_sby) + pd->dst = orig_dst; + rate += cpi->mbmode_cost[this_mode]; rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); @@ -494,6 +595,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } } + #if CONFIG_DENOISING vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize); #endif diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h index a9c948d31a513d4b17ded200826ef294e0abf65c..3d89974fc363e97328d62167ce162ebc429f307e 100644 --- a/vp9/encoder/vp9_pickmode.h +++ b/vp9/encoder/vp9_pickmode.h @@ -17,6 +17,12 @@ extern "C" { #endif +typedef struct { + uint8_t *data; + int stride; + int in_use; +} PRED_BUFFER; + int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const struct TileInfo *const tile, int mi_row, int mi_col, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 83d900d4296043b12f697176a296c9ea2af9942e..c38323ffcb65a864711062f008f6db70f00b0d5e 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -274,6 +274,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, // is checked for a partition block. Later, we can try to allow large // partitions to do intra mode checking. sf->max_intra_bsize = BLOCK_8X8; + + // This feature is only enabled when partition search is disabled. + sf->reuse_inter_pred_sby = 1; } if (speed >= 7) { @@ -339,6 +342,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; + sf->reuse_inter_pred_sby = 0; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. sf->always_this_block_size = BLOCK_16X16; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index c796421dbe08510050521120b00c6ac473438cb4..8750eafd8ce372fca2f9a8bd08ea9275996e48ff 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -353,6 +353,11 @@ typedef struct SPEED_FEATURES { // The threshold used in SOURCE_VAR_BASED_PARTITION search type. unsigned int source_var_thresh; + + // When partition is pre-set, the inter prediction result from pick_inter_mode + // can be reused in final block encoding process. It is enabled only for real- + // time mode speed 6. + int reuse_inter_pred_sby; } SPEED_FEATURES; struct VP9_COMP;