Commit 9c2552a1 authored by Yunqing Wang's avatar Yunqing Wang

Record the sum of tx block eobs in the partition block

The sum of tx bloxk eobs is needed in the machine learning based partition
early termination. The eobs are first accumulated during tx search, and
then the value associated with the best tx_size is copied to ctx for later
use.

After the sum of eobs are calculated correctly, re-enabled
ml_partition_search_early_termination speed feature.

Re-did the quality/speed test to check the impact of the fix.

1. Borg test BDRATE result:
4k set:     PSNR: +0.183%; SSIM: +0.100%;
hdres set:  PSNR: +0.168%; SSIM: +0.256%;
midres set: PSNR: +0.186%; SSIM: +0.326%;

2.Average speed gain result:
4k clips: 21%;
hd clips: 26%;
midres clips: 15%.

The result is in line with the original result.

Change-Id: I4209a95c89be03b4cbfb6a95b16885f89feddbda
parent 83ba1880
......@@ -128,6 +128,9 @@ struct macroblock {
// Set during mode selection. Read during block encoding.
uint8_t zcoeff_blk[TX_SIZES][256];
// Accumulate the tx block eobs in a partition block.
int32_t sum_y_eobs[TX_SIZES];
int skip;
int encode_breakout;
......
......@@ -73,7 +73,7 @@ typedef struct {
INTERP_FILTER pred_interp_filter;
// Used for the machine learning-based early termination
int sum_eobs;
int32_t sum_y_eobs;
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {
......
......@@ -2711,18 +2711,6 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
}
#endif
// Accumulate all tx blocks' eobs results got from the partition evaluation.
static void accumulate_eobs(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
PICK_MODE_CONTEXT *ctx = (PICK_MODE_CONTEXT *)arg;
(void)row;
(void)col;
(void)plane_bsize;
(void)tx_size;
ctx->sum_eobs += ctx->eobs_pbuf[plane][1][block];
}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
......@@ -2899,6 +2887,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (this_rdc.rdcost < best_rdc.rdcost) {
MODE_INFO *mi = xd->mi[0];
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
......@@ -2917,7 +2907,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// Currently, the machine-learning based partition search early
// termination is only used while bsize is 16x16, 32x32 or 64x64,
// VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
if (ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
if (!x->e_mbd.lossless &&
!segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
const double *clf;
const double *mean;
const double *sd;
......@@ -2936,10 +2928,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
ctx->sum_eobs = 0;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
accumulate_eobs, ctx);
if (above_in_image) {
context_size = xd->above_mi->sb_type;
if (context_size < bsize)
......@@ -2980,7 +2968,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) *
sd[3]) +
clf[4] * (((double)ctx->sum_eobs - mean[4]) / sd[4]) +
clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
if (score < 0) {
......
......@@ -742,9 +742,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
// TODO(jingning): temporarily enabled only for luma component
rd = VPXMIN(rd1, rd2);
if (plane == 0)
if (plane == 0) {
x->zcoeff_blk[tx_size][block] =
!x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block];
}
args->this_rate += rate;
args->this_dist += dist;
......@@ -3190,6 +3192,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
ref_frame = vp9_mode_order[mode_index].ref_frame[0];
second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
vp9_zero(x->sum_y_eobs);
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (midx == mode_skip_start && best_mode_index >= 0) {
......@@ -3469,6 +3473,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size];
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
......@@ -3699,6 +3704,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
mi->mv[0].as_int = 0;
x->skip = 1;
ctx->sum_y_eobs = 0;
if (cm->interp_filter != BILINEAR) {
best_filter = EIGHTTAP;
if (cm->interp_filter == SWITCHABLE &&
......@@ -3853,6 +3860,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
ref_frame = vp9_ref_order[ref_index].ref_frame[0];
second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
vp9_zero(x->sum_y_eobs);
#if CONFIG_BETTER_HW_COMPATIBILITY
// forbid 8X4 and 4X8 partitions if any reference frame is scaled.
if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
......@@ -4069,6 +4078,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
for (i = 0; i < 4; i++) {
tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
}
pred_exists = 1;
if (switchable_filter_index == 0 && sf->use_rd_breakout &&
......@@ -4233,6 +4243,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4];
for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
......
......@@ -73,10 +73,9 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
// Currently, the machine-learning based partition search early termination
// is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
// TODO(yunqingwang): Re-enable when test failures are fixed.
// if (VPXMIN(cm->width, cm->height) >= 480) {
// sf->ml_partition_search_early_termination = 1;
// }
if (VPXMIN(cm->width, cm->height) >= 480) {
sf->ml_partition_search_early_termination = 1;
}
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment