Commit 4e66293f authored by Yunqing Wang's avatar Yunqing Wang

Use source frame difference to make partition decision

Calculate the difference variance between last source frame and
current source frame. The variance is calculated at 16x16 block
level. The variances are compared to several thresholds to decide
final partition sizes.

An adaptive strategy is implemented to decide using
SOURCE_VAR_BASED_PARTITION or FIXED_PARTITION based on motions
in the video. The switching test is done once every
search_type_check_frequency frames.

The selection of source_var_thresh needs to be investigated
further later.

RTC set Borg test showed 0.424% overall psnr gain, and 0.357%
ssim gain. For clips with large enough static area, the
encoding speedup is around 2% to 15%.

Change-Id: Id7d268f1d8cbca7fb8026aa4a53b3c77459dc156
parent 06159fcd
......@@ -380,6 +380,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vp9_get_sse_sum_16x16 sse2/;
$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
......
......@@ -1392,6 +1392,126 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
}
}
const struct {
int row;
int col;
} coord_lookup[16] = {
// 32x32 index = 0
{0, 0}, {0, 2}, {2, 0}, {2, 2},
// 32x32 index = 1
{0, 4}, {0, 6}, {2, 4}, {2, 6},
// 32x32 index = 2
{4, 0}, {4, 2}, {6, 0}, {6, 2},
// 32x32 index = 3
{4, 4}, {4, 6}, {6, 4}, {6, 6},
};
static void set_source_var_based_partition(VP9_COMP *cpi,
const TileInfo *const tile,
MODE_INFO **mi_8x8,
int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *x = &cpi->mb;
const int mis = cm->mode_info_stride;
int row8x8_remaining = tile->mi_row_end - mi_row;
int col8x8_remaining = tile->mi_col_end - mi_col;
int r, c;
MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
// In-image SB64
if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
(row8x8_remaining >= MI_BLOCK_SIZE)) {
const int src_stride = x->plane[0].src.stride;
const int pre_stride = cpi->Last_Source->y_stride;
const uint8_t *src = x->plane[0].src.buf;
const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
(mi_col * MI_SIZE);
const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
const int thr_32x32 = cpi->sf.source_var_thresh;
const int thr_64x64 = thr_32x32 << 1;
int i, j;
int index;
diff d32[4];
int use16x16 = 0;
for (i = 0; i < 4; i++) {
diff d16[4];
for (j = 0; j < 4; j++) {
int b_mi_row = coord_lookup[i * 4 + j].row;
int b_mi_col = coord_lookup[i * 4 + j].col;
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
vp9_get_sse_sum_16x16(src + b_offset,
src_stride,
pre_src + b_offset,
pre_stride, &d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
index = b_mi_row * mis + b_mi_col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
// TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
// size to further improve quality.
}
if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
d32[i].sse = d16[0].sse;
d32[i].sum = d16[0].sum;
for (j = 1; j < 4; j++) {
d32[i].sse += d16[j].sse;
d32[i].sum += d16[j].sum;
}
d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
if (!((cm->current_video_frame - 1) %
cpi->sf.search_type_check_frequency))
cpi->use_large_partition_rate += 1;
} else {
use16x16 = 1;
}
}
if (!use16x16) {
if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
d32[2].var < thr_64x64 && d32[3].var < thr_64x64) {
mi_8x8[0] = mi_upper_left;
mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
}
}
} else { // partial in-image SB64
BLOCK_SIZE bsize = BLOCK_16X16;
int bh = num_8x8_blocks_high_lookup[bsize];
int bw = num_8x8_blocks_wide_lookup[bsize];
for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
int index = r * mis + c;
// Find a partition size that fits
bsize = find_partition_size(bsize,
(row8x8_remaining - r),
(col8x8_remaining - c), &bh, &bw);
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = bsize;
}
}
}
}
static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
const int mis = cm->mode_info_stride;
int block_row, block_col;
......@@ -3038,10 +3158,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
cpi->sf.always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
BLOCK_SIZE bsize;
cpi->mb.source_variance = UINT_MAX;
vp9_zero(cpi->mb.pred_mv);
......@@ -3053,8 +3170,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
break;
case SOURCE_VAR_BASED_PARTITION:
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
break;
case VAR_BASED_FIXED_PARTITION:
case FIXED_PARTITION:
bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
cpi->sf.always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
......@@ -3142,6 +3268,29 @@ static void encode_frame_internal(VP9_COMP *cpi) {
p[i].eobs = ctx->eobs_pbuf[i][0];
}
vp9_zero(x->zcoeff_blk);
if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION &&
cm->current_video_frame > 0) {
int check_freq = cpi->sf.search_type_check_frequency;
if ((cm->current_video_frame - 1) % check_freq == 0) {
cpi->use_large_partition_rate = 0;
}
if ((cm->current_video_frame - 1) % check_freq == 1) {
const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
b_width_log2_lookup[BLOCK_16X16]) +
(b_height_log2_lookup[BLOCK_32X32] -
b_height_log2_lookup[BLOCK_16X16]));
cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
mbs_in_b32x32 / cm->MBs;
}
if ((cm->current_video_frame - 1) % check_freq >= 1) {
if (cpi->use_large_partition_rate < 15)
cpi->sf.partition_search_type = FIXED_PARTITION;
}
}
}
{
......
......@@ -20,6 +20,12 @@ struct macroblock;
struct yv12_buffer_config;
struct VP9_COMP;
typedef struct {
unsigned int sse;
int sum;
unsigned int var;
} diff;
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);
......
......@@ -512,6 +512,8 @@ typedef struct VP9_COMP {
SVC svc;
int use_large_partition_rate;
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
......
......@@ -257,7 +257,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->allow_skip_recode = 0;
}
if (speed >= 6) {
sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 360;
sf->use_nonrd_pick_mode = 1;
sf->search_method = FAST_DIAMOND;
}
......@@ -325,6 +329,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 100;
switch (cpi->oxcf.mode) {
case MODE_BESTQUALITY:
......
......@@ -110,7 +110,10 @@ typedef enum {
// Use an arbitrary partitioning scheme based on source variance within
// a 64X64 SB
VAR_BASED_PARTITION
VAR_BASED_PARTITION,
// Use non-fixed partitions based on source variance
SOURCE_VAR_BASED_PARTITION
} PARTITION_SEARCH_TYPE;
typedef struct {
......@@ -305,6 +308,13 @@ typedef struct {
// used in inter frames.
// TODO(aconverse): Fold this into one of the other many mode skips
BLOCK_SIZE max_intra_bsize;
// The frequency that we check if SOURCE_VAR_BASED_PARTITION or
// FIXED_PARTITION search type should be used.
int search_type_check_frequency;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
int source_var_thresh;
} SPEED_FEATURES;
struct VP9_COMP;
......
......@@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
return (var - (((int64_t)avg * avg) >> 10));
}
void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum) {
variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
}
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment