Changes to facilitate multi-threading of encoding stage

Modified the encoding stage to have row level entry points with relevant
initializations and to access the token information at row level

Change-Id: Ife10e55a7c1a420ee906d711caf75002688d9e39
parent 106c620a
......@@ -484,23 +484,31 @@ static void write_modes_sb(VP9_COMP *cpi, MACROBLOCKD *const xd,
}
static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd,
const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
unsigned int *const max_mv_magnitude,
const TileInfo *const tile, vpx_writer *w, int tile_row,
int tile_col, unsigned int *const max_mv_magnitude,
int interp_filter_selected[MAX_REF_FRAMES]
[SWITCHABLE]) {
const VP9_COMMON *const cm = &cpi->common;
int mi_row, mi_col;
int mi_row, mi_col, tile_sb_row;
TOKENEXTRA *tok = NULL;
TOKENEXTRA *tok_end = NULL;
set_partition_probs(cm, xd);
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >>
MI_BLOCK_SIZE_LOG2;
tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start;
tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count;
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col,
BLOCK_64X64, max_mv_magnitude, interp_filter_selected);
assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop);
}
}
......@@ -919,9 +927,8 @@ static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {
MACROBLOCKD *const xd = &data->xd;
vpx_start_encode(&data->bit_writer, data->dest);
write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
&data->bit_writer, &data->tok, data->tok_end,
&data->max_mv_magnitude, data->interp_filter_selected);
assert(data->tok == data->tok_end);
&data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude,
data->interp_filter_selected);
vpx_stop_encode(&data->bit_writer);
return 1;
}
......@@ -978,8 +985,6 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
// Populate the worker data.
data->xd = cpi->td.mb.e_mbd;
data->tile_idx = tile_col;
data->tok = cpi->tile_tok[0][tile_col];
data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col];
data->max_mv_magnitude = cpi->max_mv_magnitude;
memset(data->interp_filter_selected, 0,
sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
......@@ -1039,7 +1044,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
vpx_writer residual_bc;
int tile_row, tile_col;
TOKENEXTRA *tok_end;
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
......@@ -1058,10 +1062,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_idx = tile_row * tile_cols + tile_col;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
tok_end = cpi->tile_tok[tile_row][tile_col] +
cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
......@@ -1069,9 +1069,9 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
vpx_start_encode(&residual_bc, data_ptr + total_size);
write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
&tok, tok_end, &cpi->max_mv_magnitude,
tile_row, tile_col, &cpi->max_mv_magnitude,
cpi->interp_filter_selected);
assert(tok == tok_end);
vpx_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
// size of this tile
......
......@@ -20,8 +20,6 @@ extern "C" {
typedef struct VP9BitstreamWorkerData {
uint8_t *dest;
int dest_size;
TOKENEXTRA *tok;
TOKENEXTRA *tok_end;
vpx_writer bit_writer;
int tile_idx;
unsigned int max_mv_magnitude;
......
......@@ -4078,7 +4078,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
TOKENLIST *tplist = cpi->tplist[0][0];
int tile_tok = 0;
int tplist_count = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
......@@ -4109,17 +4111,50 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = cpi->tile_tok[tile_row][tile_col];
tile_tok = allocated_tokens(*tile_info);
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = cpi->tplist[tile_row][tile_col];
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
}
}
}
void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
int tile_col, int mi_row) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo *const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = NULL;
int tile_sb_row;
int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
MI_BLOCK_SIZE_LOG2;
get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
if (cpi->sf.use_nonrd_pick_mode)
encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
else
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
cpi->tplist[tile_row][tile_col][tile_sb_row].count =
(unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
cpi->tplist[tile_row][tile_col][tile_sb_row].start);
assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
(void)tile_mb_cols;
}
void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
int tile_col) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo *const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
const int mi_row_start = tile_info->mi_row_start;
const int mi_row_end = tile_info->mi_row_end;
int mi_row;
......@@ -4130,16 +4165,8 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode)
encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
else
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
}
cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
assert(tok - cpi->tile_tok[tile_row][tile_col] <=
allocated_tokens(*tile_info));
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
static void encode_tiles(VP9_COMP *cpi) {
......
......@@ -495,6 +495,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->tile_tok[0][0]);
cpi->tile_tok[0][0] = 0;
vpx_free(cpi->tplist[0][0]);
cpi->tplist[0][0] = NULL;
vp9_free_pc_tree(&cpi->td);
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
......@@ -829,6 +832,7 @@ static int alloc_context_buffers_ext(VP9_COMP *cpi) {
static void alloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int sb_rows;
vp9_alloc_context_buffers(cm, cm->width, cm->height);
......@@ -842,6 +846,12 @@ static void alloc_compressor_data(VP9_COMP *cpi) {
vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
}
sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
vpx_free(cpi->tplist[0][0]);
CHECK_MEM_ERROR(
cm, cpi->tplist[0][0],
vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
vp9_setup_pc_tree(&cpi->common, &cpi->td);
}
......
......@@ -284,6 +284,12 @@ typedef struct RowMTInfo {
#endif
} RowMTInfo;
typedef struct {
TOKENEXTRA *start;
TOKENEXTRA *stop;
unsigned int count;
} TOKENLIST;
typedef struct MultiThreadHandle {
int allocated_tile_rows;
int allocated_tile_cols;
......@@ -470,6 +476,7 @@ typedef struct VP9_COMP {
TOKENEXTRA *tile_tok[4][1 << 6];
uint32_t tok_count[4][1 << 6];
TOKENLIST *tplist[4][1 << 6];
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
......@@ -777,6 +784,20 @@ static INLINE int allocated_tokens(TileInfo tile) {
return get_token_alloc(tile_mb_rows, tile_mb_cols);
}
static INLINE void get_start_tok(VP9_COMP *cpi, int tile_row, int tile_col,
int mi_row, TOKENEXTRA **tok) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo *const tile_info = &this_tile->tile_info;
int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
const int mb_row = (mi_row - tile_info->mi_row_start) >> 1;
*tok =
cpi->tile_tok[tile_row][tile_col] + get_token_alloc(mb_row, tile_mb_cols);
}
int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment