Commit e890c257 authored by Jim Bankoski's avatar Jim Bankoski

add a context tree structure to encoder

This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes.  That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split.  It also includes a pointer to the current chosen partitioning.

This replaces code that held an index for every level in the pick
modes array including:  sb_index, mb_index,
b_index, ab_index.

These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays

array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[],  sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context

and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.

Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement),  update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize,  and then up to 3
lookups based upon the block size to find the context to use.

With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above.   At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).

In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.



Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
parent 68dcddee
......@@ -79,6 +79,18 @@ struct macroblock_plane {
// Zbin Over Quant value
int16_t zbin_extra;
};
typedef struct PC_TREE {
int index;
PARTITION_TYPE partitioning;
BLOCK_SIZE block_size;
PICK_MODE_CONTEXT none;
PICK_MODE_CONTEXT horizontal[2];
PICK_MODE_CONTEXT vertical[2];
union {
struct PC_TREE *split[4];
PICK_MODE_CONTEXT *leaf_split[4];
};
} PC_TREE;
/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
* coefficient in this block was zero) or not. */
......@@ -133,11 +145,6 @@ struct macroblock {
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
unsigned char sb_index; // index of 32x32 block inside the 64x64 block
unsigned char mb_index; // index of 16x16 block inside the 32x32 block
unsigned char b_index; // index of 8x8 block inside the 16x16 block
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
int mv_col_min;
......@@ -164,69 +171,14 @@ struct macroblock {
// Used to store sub partition's choices.
int_mv pred_mv[MAX_REF_FRAMES];
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
PICK_MODE_CONTEXT ab4x4_context[4][4][4];
PICK_MODE_CONTEXT sb8x4_context[4][4][4];
PICK_MODE_CONTEXT sb4x8_context[4][4][4];
PICK_MODE_CONTEXT sb8x8_context[4][4][4];
PICK_MODE_CONTEXT sb8x16_context[4][4][2];
PICK_MODE_CONTEXT sb16x8_context[4][4][2];
PICK_MODE_CONTEXT mb_context[4][4];
PICK_MODE_CONTEXT sb32x16_context[4][2];
PICK_MODE_CONTEXT sb16x32_context[4][2];
// when 4 MBs share coding parameters:
PICK_MODE_CONTEXT sb32_context[4];
PICK_MODE_CONTEXT sb32x64_context[2];
PICK_MODE_CONTEXT sb64x32_context[2];
PICK_MODE_CONTEXT sb64_context;
PICK_MODE_CONTEXT *leaf_tree;
PC_TREE *pc_tree;
PC_TREE *pc_root;
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
BLOCK_SIZE b_partitioning[4][4][4];
BLOCK_SIZE mb_partitioning[4][4];
BLOCK_SIZE sb_partitioning[4];
BLOCK_SIZE sb64_partitioning;
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
};
// TODO(jingning): the variables used here are little complicated. need further
// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_context;
case BLOCK_64X32:
return &x->sb64x32_context[x->sb_index];
case BLOCK_32X64:
return &x->sb32x64_context[x->sb_index];
case BLOCK_32X32:
return &x->sb32_context[x->sb_index];
case BLOCK_32X16:
return &x->sb32x16_context[x->sb_index][x->mb_index];
case BLOCK_16X32:
return &x->sb16x32_context[x->sb_index][x->mb_index];
case BLOCK_16X16:
return &x->mb_context[x->sb_index][x->mb_index];
case BLOCK_16X8:
return &x->sb16x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X16:
return &x->sb8x16_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X8:
return &x->sb8x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X4:
return &x->sb8x4_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_4X8:
return &x->sb4x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_4X4:
return &x->ab4x4_context[x->sb_index][x->mb_index][x->b_index];
default:
assert(0);
return NULL;
}
}
#ifdef __cplusplus
} // extern "C"
......
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/encoder/vp9_context_tree.h"
static const BLOCK_SIZE square[] = {
BLOCK_8X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
};
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
PICK_MODE_CONTEXT *ctx) {
const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
const int num_pix = num_blk << 4;
int i, k;
ctx->num_4x4_blk = num_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(16, num_pix * sizeof(uint16_t)));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
}
}
}
static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
int i, k;
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
vpx_free(ctx->qcoeff[i][k]);
ctx->qcoeff[i][k] = 0;
vpx_free(ctx->dqcoeff[i][k]);
ctx->dqcoeff[i][k] = 0;
vpx_free(ctx->eobs[i][k]);
ctx->eobs[i][k] = 0;
}
}
}
static void free_tree_contexts(PC_TREE *this_pc) {
free_mode_context(&this_pc->none);
free_mode_context(&this_pc->horizontal[0]);
free_mode_context(&this_pc->horizontal[1]);
free_mode_context(&this_pc->vertical[0]);
free_mode_context(&this_pc->vertical[1]);
}
static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *this_pc,
int num_4x4_blk) {
alloc_mode_context(cm, num_4x4_blk, &this_pc->none);
alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[0]);
/* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used.
* Figure out a better way to do this. */
alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[1]);
}
// This function sets up a tree of contexts such that at each square
// partition level. There are contexts for none, horizontal, vertical, and
// split. Along with a block_size value and a selected block_size which
// represents the state of our search.
void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
int i, j;
const int leaf_nodes = 64;
const int tree_nodes = 64 + 16 + 4 + 1;
int pc_tree_index = 0;
PC_TREE *this_pc;
PICK_MODE_CONTEXT *this_leaf;
int square_index = 1;
int nodes;
vpx_free(x->leaf_tree);
CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes,
sizeof(PICK_MODE_CONTEXT)));
vpx_free(x->pc_tree);
CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(PC_TREE)));
this_pc = &x->pc_tree[0];
this_leaf = &x->leaf_tree[0];
// 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
// context so we only need to allocate 1 for each 8x8 block.
for (i = 0; i < leaf_nodes; ++i)
alloc_mode_context(cm, 1, &x->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.
for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
x->pc_tree[pc_tree_index].block_size = square[0];
alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], 4);
x->pc_tree[pc_tree_index].leaf_split[0] = this_leaf++;
for (j = 1; j < 4; j++) {
x->pc_tree[pc_tree_index].leaf_split[j] =
x->pc_tree[pc_tree_index].leaf_split[0];
}
}
// Each node has 4 leaf nodes, fill each block_size level of the tree
// from leafs to the root.
for (nodes = 16; nodes > 0; nodes >>= 2, ++square_index) {
for (i = 0; i < nodes; ++pc_tree_index, ++i) {
alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index],
4 << (2 * square_index));
x->pc_tree[pc_tree_index].block_size = square[square_index];
for (j = 0; j < 4; j++) {
x->pc_tree[pc_tree_index].split[j] = this_pc++;
}
}
}
x->pc_root = &x->pc_tree[tree_nodes-1];
x->pc_root[0].none.best_mode_index = 2;
}
void vp9_free_pc_tree(MACROBLOCK *m) {
const int tree_nodes = 64 + 16 + 4 + 1;
int i;
// Set up all 4x4 mode contexts
for (i = 0; i < 64; ++i)
free_mode_context(&m->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.
for (i = 0; i < tree_nodes; i++) {
free_tree_contexts(&m->pc_tree[i]);
}
vpx_free(m->pc_tree);
m->pc_tree = 0;
vpx_free(m->leaf_tree);
m->leaf_tree = 0;
}
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
#include "vp9/encoder/vp9_onyx_int.h"
void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x);
void vp9_free_pc_tree(MACROBLOCK *x);
#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
This diff is collapsed.
......@@ -474,7 +474,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
TileInfo tile;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
const PICK_MODE_CONTEXT *ctx = &x->sb64_context;
const PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
int i;
int recon_yoffset, recon_uvoffset;
......
......@@ -31,6 +31,7 @@
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_bitstream.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_firstpass.h"
......@@ -179,6 +180,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->tok);
cpi->tok = 0;
vp9_free_pc_tree(&cpi->mb);
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
vpx_free(lc->rc_twopass_stats_in.buf);
......@@ -563,6 +566,8 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
}
vp9_setup_pc_tree(&cpi->common, &cpi->mb);
}
......@@ -880,124 +885,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX);
}
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
PICK_MODE_CONTEXT *ctx) {
int num_pix = num_4x4_blk << 4;
int i, k;
ctx->num_4x4_blk = num_4x4_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(16, num_pix * sizeof(uint16_t)));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
}
}
}
static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
int i, k;
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0;
vpx_free(ctx->qcoeff[i][k]);
ctx->qcoeff[i][k] = 0;
vpx_free(ctx->dqcoeff[i][k]);
ctx->dqcoeff[i][k] = 0;
vpx_free(ctx->eobs[i][k]);
ctx->eobs[i][k] = 0;
}
}
}
static void init_pick_mode_context(VP9_COMP *cpi) {
int i;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
}
} else if (i < BLOCK_32X32) {
for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
} else if (i < BLOCK_64X64) {
for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
alloc_mode_context(cm, num_4x4_blk, ctx);
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
}
static void free_pick_mode_context(MACROBLOCK *x) {
int i;
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
free_mode_context(ctx);
}
}
}
} else if (i < BLOCK_32X32) {
for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
free_mode_context(ctx);
}
}
} else if (i < BLOCK_64X64) {
for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
free_mode_context(ctx);
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
free_mode_context(ctx);
}
}
}
VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
int i, j;
......@@ -1026,7 +913,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
init_config(cpi, oxcf);
vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc);
init_pick_mode_context(cpi);
cm->current_video_frame = 0;
......@@ -1418,7 +1304,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#endif
}
free_pick_mode_context(&cpi->mb);
dealloc_compressor_data(cpi);
vpx_free(cpi->mb.ss);
vpx_free(cpi->tok);
......
......@@ -18,6 +18,8 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
VP9_CX_SRCS-yes += encoder/vp9_cost.h
VP9_CX_SRCS-yes += encoder/vp9_cost.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment