Commit 4a88ad89 authored by Jingning Han's avatar Jingning Han

Extend left/above partition context to per mi(8x8)

Update and buffer left/above partition information context per 8x8
block. This allows to further enable recursive partition down to
4x4 block size, and hence deprecating I4X4_PRED and SPLITMV.

This commit also fixes a context buffer swap/restore issue in 32x32
partition type search. This gives 0.1% performance gain for derf/yt.
Will refactor the superblock partition type search into recursion
form.

Change-Id: Ib61975aca5f12b78d8018481d7fa1393d085689b
parent e8747866
......@@ -70,7 +70,7 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) {
}
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
int i, mb_cols;
int i, mi_cols;
// Our internal buffers are always multiples of 16
const int aligned_width = multiple16(width);
......@@ -140,19 +140,19 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
// FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
// information is exposed at this level
mb_cols = mb_cols_aligned_to_sb(oci);
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 12 * mb_cols, 1);
mi_cols = mi_cols_aligned_to_sb(oci);
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1);
if (!oci->above_context[0]) {
vp9_free_frame_buffers(oci);
return 1;
}
oci->above_context[1] =
oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
oci->above_context[2] =
oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols;
oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
oci->above_seg_context =
vpx_calloc(sizeof(PARTITION_CONTEXT) * mb_cols_aligned_to_sb(oci), 1);
vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
if (!oci->above_seg_context) {
vp9_free_frame_buffers(oci);
......
......@@ -413,7 +413,7 @@ typedef struct macroblockd {
static INLINE void update_partition_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type,
BLOCK_SIZE_TYPE sb_size) {
int bsl = mi_width_log2(sb_size), bs;
int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
int bwl = mi_width_log2(sb_type);
int bhl = mi_height_log2(sb_type);
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
......@@ -422,8 +422,6 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
if (bsl == 0)
return;
bs = 1 << (bsl - 1);
// update the partition context at the end notes. set partition bits
// of block sizes larger than the current one to be one, and partition
// bits of smaller block sizes to be zero.
......@@ -454,18 +452,14 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
static INLINE int partition_plane_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type) {
int bsl = mi_width_log2(sb_type), bs;
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
int above = 0, left = 0, i;
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
bs = 1 << (bsl - 1);
assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
assert(bsl >= 0);
assert(boffset >= 0);
bs = 1 << (bsl - 1);
for (i = 0; i < bs; i++)
above |= (xd->above_seg_context[i] & (1 << boffset));
for (i = 0; i < bs; i++)
......
......@@ -18,6 +18,8 @@
#define MI_SIZE (1 << LOG2_MI_SIZE)
#define MI_UV_SIZE (1 << (LOG2_MI_SIZE - 1))
#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_AB4X4,
#if CONFIG_AB4X4
......
......@@ -217,7 +217,7 @@ typedef struct VP9Common {
// partition contexts
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[4];
PARTITION_CONTEXT left_seg_context[8];
/* keyframe block modes are predicted by their above, left neighbors */
......@@ -297,8 +297,8 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
buf[new_idx]++;
}
static int mb_cols_aligned_to_sb(VP9_COMMON *cm) {
return (cm->mb_cols + 3) & ~3;
static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {
return 2 * ((cm->mb_cols + 3) & ~3);
}
static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
......
......@@ -372,8 +372,8 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
xd->plane[i].left_context = cm->left_context[i] +
(((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
}
xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = cm->above_seg_context + mi_col;
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
// Distance of Mb to the various image edges. These are specified to 8th pel
// as they are always compared to values that are in 1/8th pel units
......@@ -443,9 +443,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
if (bsize > BLOCK_SIZE_SB8X8) {
int pl;
// read the partition information
xd->left_seg_context =
pc->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = pc->above_seg_context + mi_col;
pl = partition_plane_context(xd, bsize);
partition = treed_read(r, vp9_partition_tree,
pc->fc.partition_prob[pl]);
......@@ -486,8 +485,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
return;
xd->left_seg_context = pc->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = pc->above_seg_context + (mi_col >> 1);
xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = pc->above_seg_context + mi_col;
update_partition_context(xd, subsize, bsize);
}
......@@ -849,11 +848,11 @@ static void decode_tiles(VP9D_COMP *pbi,
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
MAX_MB_PLANE * mb_cols_aligned_to_sb(pc));
vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));
vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
mb_cols_aligned_to_sb(pc));
mi_cols_aligned_to_sb(pc));
if (pbi->oxcf.inv_tile_order) {
const int n_cols = pc->tile_columns;
......
......@@ -878,9 +878,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
if (bsize > BLOCK_SIZE_SB8X8) {
int pl;
xd->left_seg_context =
cm->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = cm->above_seg_context + mi_col;
pl = partition_plane_context(xd, bsize);
// encode the partition information
write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
......@@ -918,8 +917,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
return;
xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = cm->above_seg_context + mi_col;
update_partition_context(xd, subsize, bsize);
}
......@@ -932,7 +931,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
m_ptr += c->cur_tile_mi_col_start + c->cur_tile_mi_row_start * mis;
vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
mb_cols_aligned_to_sb(c));
mi_cols_aligned_to_sb(c));
for (mi_row = c->cur_tile_mi_row_start;
mi_row < c->cur_tile_mi_row_end;
......
......@@ -544,8 +544,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
xd->above_seg_context = cm->above_seg_context + mi_col;
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
}
static void set_offsets(VP9_COMP *cpi,
......@@ -878,7 +878,7 @@ static void encode_sb_row(VP9_COMP *cpi,
int sb64_rate = 0, sb64_dist = 0;
int sb64_skip = 0;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT seg_l[4], seg_a[4];
PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE];
TOKENEXTRA *tp_orig = *tp;
for (p = 0; p < MAX_MB_PLANE; p++) {
......@@ -888,9 +888,8 @@ static void encode_sb_row(VP9_COMP *cpi,
memcpy(l + 16 * p, cm->left_context[p],
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
}
memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1),
sizeof(seg_a));
memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a));
vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
// FIXME(rbultje): this function should probably be rewritten to be
// recursive at some point in the future.
......@@ -902,6 +901,7 @@ static void encode_sb_row(VP9_COMP *cpi,
int sb32_skip = 0;
int j;
ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE];
sb_partitioning[i] = BLOCK_SIZE_MB16X16;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
......@@ -920,6 +920,8 @@ static void encode_sb_row(VP9_COMP *cpi,
((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32));
vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32));
/* Encode MBs in raster order within the SB */
for (j = 0; j < 4; j++) {
......@@ -928,6 +930,7 @@ static void encode_sb_row(VP9_COMP *cpi,
int r, d;
int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE];
mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
......@@ -950,6 +953,9 @@ static void encode_sb_row(VP9_COMP *cpi,
((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
}
vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m,
sizeof(sa16));
vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16));
for (k = 0; k < 4; k++) {
xd->b_index = k;
......@@ -983,6 +989,9 @@ static void encode_sb_row(VP9_COMP *cpi,
a3 + 4 * p,
sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
}
vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m,
sa16, sizeof(sa16));
vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16));
// try 8x16 coding
r2 = 0;
......@@ -1102,6 +1111,9 @@ static void encode_sb_row(VP9_COMP *cpi,
a2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
// restore partition information context
vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32));
vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32));
set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx);
pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
......@@ -1258,8 +1270,7 @@ static void encode_sb_row(VP9_COMP *cpi,
memcpy(cm->left_context[p], l + 16 * p,
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
}
memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a,
sizeof(seg_a));
memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a));
memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
set_partition_seg_context(cpi, mi_row, mi_col);
......@@ -1428,10 +1439,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 *
MAX_MB_PLANE * mb_cols_aligned_to_sb(cm));
vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
mb_cols_aligned_to_sb(cm));
mi_cols_aligned_to_sb(cm));
}
static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment