Commit c06d56cc authored by Scott LaVarnway's avatar Scott LaVarnway

VP9: Move ref_mvs[][] and mode_context[] from MB_MODE_INFO

to MB_MODE_INFO_EXT.  This saves 36 bytes per 8x8 area for
both the decoder and encoder. (encoder has two MODE_INFO
buffers)

Change-Id: If006abb2224acaf326df3c2be09e77e967662107
parent 437d033d
......@@ -81,12 +81,6 @@ typedef struct {
// TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
#if CONFIG_VP9_ENCODER
// TODO(slavarnway): Move to encoder
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES];
#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
......
......@@ -242,6 +242,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
const MACROBLOCKD *const xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const PREDICTION_MODE mode = mbmi->mode;
const int segment_id = mbmi->segment_id;
const BLOCK_SIZE bsize = mbmi->sb_type;
......@@ -288,7 +289,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
} else {
const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
const vp9_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
write_ref_frames(cm, xd, w);
......@@ -321,7 +322,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
if (b_mode == NEWMV) {
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
&mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
&mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
nmvc, allow_hp);
}
}
......@@ -330,7 +331,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
if (mode == NEWMV) {
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
&mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
&mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
allow_hp);
}
}
......@@ -384,6 +385,9 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
cpi->td.mb.mbmi_ext = cpi->td.mb.mbmi_ext_base +
(mi_row * cm->mi_cols + mi_col);
set_mi_row_col(xd, tile,
mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
......
......@@ -47,11 +47,18 @@ struct macroblock_plane {
typedef unsigned int vp9_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
[COEFF_CONTEXTS][ENTROPY_TOKENS];
typedef struct {
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES];
} MB_MODE_INFO_EXT;
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
MACROBLOCKD e_mbd;
MB_MODE_INFO_EXT *mbmi_ext;
MB_MODE_INFO_EXT *mbmi_ext_base;
int skip_block;
int select_tx_size;
int skip_recode;
......
......@@ -12,6 +12,7 @@
#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_block.h"
struct VP9_COMP;
struct VP9Common;
......@@ -20,6 +21,7 @@ struct ThreadData;
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
MB_MODE_INFO_EXT mbmi_ext;
uint8_t *zcoeff_blk;
tran_low_t *coeff[MAX_MB_PLANE][3];
tran_low_t *qcoeff[MAX_MB_PLANE][3];
......
This diff is collapsed.
......@@ -229,12 +229,13 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2],
static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
const int_mv mvs[2],
nmv_context_counts *counts) {
int i;
for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
const MV *ref = &mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
vp9_inc_mv(&diff, counts);
......@@ -245,6 +246,7 @@ void vp9_update_mv_count(ThreadData *td) {
const MACROBLOCKD *xd = &td->mb.e_mbd;
const MODE_INFO *mi = xd->mi[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *mbmi_ext = td->mb.mbmi_ext;
if (mbmi->sb_type < BLOCK_8X8) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
......@@ -255,12 +257,12 @@ void vp9_update_mv_count(ThreadData *td) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int i = idy * 2 + idx;
if (mi->bmi[i].as_mode == NEWMV)
inc_mvs(mbmi, mi->bmi[i].as_mv, &td->counts->mv);
inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
}
}
} else {
if (mbmi->mode == NEWMV)
inc_mvs(mbmi, mbmi->mv, &td->counts->mv);
inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv);
}
}
......@@ -335,6 +335,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
int i;
vpx_free(cpi->mbmi_ext_base);
cpi->mbmi_ext_base = NULL;
vpx_free(cpi->tile_data);
cpi->tile_data = NULL;
......@@ -670,11 +673,25 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
"Failed to allocate scaled last source buffer");
}
static int alloc_context_buffers_ext(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int mi_size = cm->mi_cols * cm->mi_rows;
cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
if (!cpi->mbmi_ext_base)
return 1;
return 0;
}
void vp9_alloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
vp9_alloc_context_buffers(cm, cm->width, cm->height);
alloc_context_buffers_ext(cpi);
vpx_free(cpi->tile_tok[0][0]);
{
......@@ -716,6 +733,9 @@ static void update_frame_size(VP9_COMP *cpi) {
vp9_set_mb_mi(cm, cm->width, cm->height);
vp9_init_context_buffers(cm);
init_macroblockd(cm, xd);
cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
memset(cpi->mbmi_ext_base, 0,
cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
set_tile_limits(cpi);
......
......@@ -291,6 +291,7 @@ typedef struct IMAGE_STAT {
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base;
DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
VP9_COMMON common;
......
......@@ -39,7 +39,8 @@ typedef struct {
int in_use;
} PRED_BUFFER;
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
......@@ -111,7 +112,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
Done:
mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
......@@ -131,7 +132,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const int sadpb = x->sadperbit16;
MV mvp_full;
const int ref = mbmi->ref_frame[0];
const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
int dis;
int rate_mode;
const int tmp_col_min = x->mv_col_min;
......@@ -155,7 +156,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
assert(x->mv_best_ref_index[ref] <= 2);
if (x->mv_best_ref_index[ref] < 2)
mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
else
mvp_full = x->pred_mv[ref];
......@@ -178,7 +179,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
*rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref]]
rate_mode = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]]
[INTER_OFFSET(NEWMV)];
rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
best_rd_sofar);
......@@ -776,7 +777,6 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
struct buf_2d yv12_mb[][MAX_MB_PLANE],
int *rate, int64_t *dist) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
unsigned int var = var_y, sse = sse_y;
......@@ -850,7 +850,7 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
x->skip = 1;
// The cost of skip bit needs to be added.
*rate = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
*rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
// More on this part of rate
......@@ -1172,7 +1172,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][ref_frame].as_int = 0;
if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
......@@ -1181,9 +1181,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cm->use_prev_frame_mvs)
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
candidates, mi_row, mi_col, NULL, NULL,
xd->mi[0]->mbmi.mode_context);
x->mbmi_ext->mode_context);
else
const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info,
xd->mi[0],
ref_frame, candidates,
mi_row, mi_col);
......@@ -1257,13 +1257,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
&x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
&x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
......@@ -1426,7 +1426,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
this_rdc.rate += rate_mv;
this_rdc.rate +=
cpi->inter_mode_cost[mbmi->mode_context[ref_frame]][INTER_OFFSET(
cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]][INTER_OFFSET(
this_mode)];
this_rdc.rate += ref_frame_cost[ref_frame];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
......@@ -1629,6 +1629,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const struct segmentation *const seg = &cm->seg;
MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
MV_REFERENCE_FRAME best_ref_frame = NONE;
......@@ -1652,14 +1653,14 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
x->pred_mv_sad[ref_frame] = INT_MAX;
if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
int_mv *const candidates = mbmi_ext->ref_mvs[ref_frame];
const struct scale_factors *const sf =
&cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
candidates, mi_row, mi_col, NULL, NULL,
xd->mi[0]->mbmi.mode_context);
mbmi_ext->mode_context);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&dummy_mv[0], &dummy_mv[1]);
......@@ -1734,7 +1735,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_append_sub8x8_mvs_for_idx(cm, xd, i, 0, mi_row, mi_col,
&b_mv[NEARESTMV],
&b_mv[NEARMV],
xd->mi[0]->mbmi.mode_context);
mbmi_ext->mode_context);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int b_rate = 0;
......@@ -1759,12 +1760,12 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3;
}
vp9_set_mv_search_range(x, &mbmi->ref_mvs[0]->as_mv);
vp9_set_mv_search_range(x, &mbmi_ext->ref_mvs[0]->as_mv);
vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, x->sadperbit4,
cond_cost_list(cpi, cost_list),
&mbmi->ref_mvs[ref_frame][0].as_mv, &tmp_mv,
&mbmi_ext->ref_mvs[ref_frame][0].as_mv, &tmp_mv,
INT_MAX, 0);
x->mv_col_min = tmp_col_min;
......@@ -1777,17 +1778,17 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col = tmp_mv.col * 8;
b_rate += vp9_mv_bit_cost(&mvp_full,
&mbmi->ref_mvs[ref_frame][0].as_mv,
&mbmi_ext->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost,
MV_COST_WEIGHT);
b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
[INTER_OFFSET(NEWMV)];
if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd)
continue;
cpi->find_fractional_mv_step(x, &tmp_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
&mbmi_ext->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
......@@ -1800,7 +1801,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
} else {
b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
}
......@@ -1880,6 +1881,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
mbmi->mode = xd->mi[0]->bmi[3].as_mode;
ctx->mic = *(xd->mi[0]);
ctx->mbmi_ext = *x->mbmi_ext;
ctx->skip_txfm[0] = 0;
ctx->skip = 0;
// Dummy assignment for speed -5. No effect in speed -6.
......
......@@ -452,8 +452,6 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int i;
int zero_seen = 0;
int best_index = 0;
......@@ -468,13 +466,14 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
block_size < x->max_partition_size);
MV pred_mv[3];
pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
pred_mv[2] = x->pred_mv[ref_frame];
assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
near_same_nearest =
mbmi->ref_mvs[ref_frame][0].as_int == mbmi->ref_mvs[ref_frame][1].as_int;
x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
// Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i];
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment