Commit b41c17d6 authored by Attila Nagy's avatar Attila Nagy Committed by Yunqing Wang
Browse files

Shares one set of RD costs tables between all encoding threads

RD costs were local to MACROBLOCK data and had to be copied all the
time to each thread's MACROBLOCK data. Tables moved to a common place
and only pointers are setup for each encoding thread.

vp8_cost_tokens() generates 'int' costs so changed all types to be
int (i.e. removed unsigned).

NOTE: Could do some more cleaning in vp8cx_init_mbrthread_data().

Change-Id: Ifa4de4c6286dffaca7ed3082041fe5af1345ddc0
parent 11876faa
......@@ -90,16 +90,17 @@ typedef struct macroblock
signed int act_zbin_adj;
signed int last_act_zbin_adj;
int mvcosts[2][MVvals+1];
int *mvcost[2];
int mvsadcosts[2][MVfpvals+1];
int *mvsadcost[2];
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
unsigned int bmode_costs[10][10][10];
unsigned int inter_bmode_costs[B_MODE_COUNT];
// These define limits to motion vector components to prevent them from extending outside the UMV borders
int (*mbmode_cost)[MB_MODE_COUNT];
int (*intra_uv_mode_cost)[MB_MODE_COUNT];
int (*bmode_costs)[10][10];
int *inter_bmode_costs;
int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
// These define limits to motion vector components to prevent
// them from extending outside the UMV borders
int mv_col_min;
int mv_col_max;
int mv_row_min;
......@@ -115,7 +116,6 @@ typedef struct macroblock
unsigned char *active_ptr;
MV_CONTEXT *mvc;
unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
int optimize;
int q_index;
......
......@@ -337,21 +337,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->src.v_buffer = x->src.v_buffer;
*/
z->mvcost[0] = x->mvcost[0];
z->mvcost[1] = x->mvcost[1];
z->mvsadcost[0] = x->mvsadcost[0];
z->mvsadcost[1] = x->mvsadcost[1];
vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
z->mvcost[0] = &z->mvcosts[0][mv_max+1];
z->mvcost[1] = &z->mvcosts[1][mv_max+1];
z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1];
z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1];
vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs));
vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs));
//memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
//memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost));
vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost));
vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs));
z->token_costs = x->token_costs;
z->inter_bmode_costs = x->inter_bmode_costs;
z->mbmode_cost = x->mbmode_cost;
z->intra_uv_mode_cost = x->intra_uv_mode_cost;
z->bmode_costs = x->bmode_costs;
for (i = 0; i < 25; i++)
{
......@@ -359,17 +354,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->block[i].quant_fast = x->block[i].quant_fast;
z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].round = x->block[i].round;
z->q_index = x->q_index;
z->act_zbin_adj = x->act_zbin_adj;
z->last_act_zbin_adj = x->last_act_zbin_adj;
/*
z->block[i].src = x->block[i].src;
*/
z->block[i].src_stride = x->block[i].src_stride;
z->block[i].src_stride = x->block[i].src_stride;
}
z->q_index = x->q_index;
z->act_zbin_adj = x->act_zbin_adj;
z->last_act_zbin_adj = x->last_act_zbin_adj;
{
MACROBLOCKD *xd = &x->e_mbd;
MACROBLOCKD *zd = &z->e_mbd;
......@@ -401,9 +394,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
zd->segmentation_enabled = xd->segmentation_enabled;
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
sizeof(xd->segment_feature_data));
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
......
......@@ -18,6 +18,8 @@
void vp8_init_mode_costs(VP8_COMP *c)
{
VP8_COMMON *x = &c->common;
struct rd_costs_struct *rd_costs = &c->rd_costs;
{
const vp8_tree_p T = vp8_bmode_tree;
......@@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c)
do
{
vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], vp8_kf_bmode_prob[i][j], T);
vp8_cost_tokens(rd_costs->bmode_costs[i][j],
vp8_kf_bmode_prob[i][j], T);
}
while (++j < VP8_BINTRAMODES);
}
while (++i < VP8_BINTRAMODES);
vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T);
}
vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree);
vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob,
vp8_sub_mv_ref_tree);
vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
vp8_cost_tokens(c->mb.mbmode_cost[0], vp8_kf_ymode_prob, vp8_kf_ymode_tree);
vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob,
vp8_kf_ymode_tree);
vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, vp8_uv_mode_tree);
vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob,
vp8_uv_mode_tree);
vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob,
vp8_uv_mode_tree);
}
......@@ -1883,13 +1883,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->gf_rate_correction_factor = 1.0;
cpi->twopass.est_max_qcorrection_factor = 1.0;
cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1];
cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1];
cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1];
cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1];
cal_mvsadcosts(cpi->mb.mvsadcost);
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
......@@ -2023,13 +2016,29 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->common.error.setjmp = 0;
#if CONFIG_MULTI_RES_ENCODING
/* Calculate # of MBs in a row in lower-resolution level image. */
if (cpi->oxcf.mr_encoder_id > 0)
vp8_cal_low_res_mb_cols(cpi);
#endif
return cpi;
/* setup RD costs to MACROBLOCK struct */
cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1];
cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1];
cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1];
cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1];
cal_mvsadcosts(cpi->mb.mvsadcost);
cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost;
cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost;
cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs;
cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs;
cpi->mb.token_costs = cpi->rd_costs.token_costs;
return cpi;
}
......
......@@ -693,6 +693,17 @@ typedef struct VP8_COMP
int mr_low_res_mb_cols;
#endif
struct rd_costs_struct
{
int mvcosts[2][MVvals+1];
int mvsadcosts[2][MVfpvals+1];
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
int bmode_costs[10][10][10];
int inter_bmode_costs[B_MODE_COUNT];
int token_costs[BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
} rd_costs;
} VP8_COMP;
void control_data_rate(VP8_COMP *cpi);
......
......@@ -132,7 +132,7 @@ static int pick_intra4x4block(
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
unsigned int *mode_costs,
const int *mode_costs,
int *bestrate,
int *bestdistortion)
......@@ -185,7 +185,7 @@ static int pick_intra4x4mby_modes
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
unsigned int *bmode_costs;
const int *bmode_costs;
intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
......
......@@ -235,7 +235,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
vp8_copy(cc->mvcosts, cpi->mb.mvcosts);
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob);
vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob);
......@@ -272,7 +272,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
vp8_copy(cpi->common.fc.mvc, cc->mvc);
vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts);
vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob);
vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob);
......
......@@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] =
};
static void fill_token_costs(
unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
)
{
int i, j, k;
......@@ -159,21 +159,24 @@ static void fill_token_costs(
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
for (k = 0; k < PREV_COEF_CONTEXTS; k++)
// check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1
if(k==0 && j>(i==0) )
vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2);
if (k == 0 && j > (i == 0))
vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
else
vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
}
static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
static const int rd_iifactor[32] =
{
4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE] =
static const int sad_per_bit16lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
......@@ -192,7 +195,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] =
11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 14, 14
};
static int sad_per_bit4lut[QINDEX_RANGE] =
static const int sad_per_bit4lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
......@@ -637,7 +640,7 @@ static int rd_pick_intra4x4block(
BLOCK *be,
BLOCKD *b,
B_PREDICTION_MODE *best_mode,
unsigned int *bmode_costs,
const int *bmode_costs,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
......@@ -717,7 +720,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
unsigned int *bmode_costs;
const int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment