vp9,realtime: Enable row multithreading for non-rd

Enable row level multithreading for realtime encodes where non-rd
path is used (speed >= 5).

Change-Id: I5439cb49a02171166d8e1de06c7d5e6f8e819a41
parent 07c48ccf
......@@ -274,16 +274,15 @@ class VPxEncoderThreadTest
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
// While row_mt = 1/0(with/without row-based multi-threading), several
// speed features that would adaptively adjust encoding parameters have
// to be disabled to guarantee the bit match of the resulted bitstream.
encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
} else {
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
encoder->Control(VP9E_SET_AQ_MODE, 3);
}
encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
// While row_mt = 1, several speed features that would adaptively adjust
// encoding parameters have to be disabled to guarantee the bit exactness
// of the resulting bitstream.
encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
encoder_initialized_ = true;
}
}
......
......@@ -3907,13 +3907,18 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
const int mi_col_start = tile_info->mi_col_start;
const int mi_col_end = tile_info->mi_col_end;
int mi_col;
const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
const int num_sb_cols =
get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
int sb_col_in_tile;
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
const struct segmentation *const seg = &cm->seg;
RD_COST dummy_rdc;
const int idx_str = cm->mi_stride * mi_row + mi_col;
......@@ -3921,6 +3926,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
BLOCK_SIZE bsize = BLOCK_64X64;
int seg_skip = 0;
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile - 1);
x->source_variance = UINT_MAX;
vp9_zero(x->pred_mv);
vp9_rd_cost_init(&dummy_rdc);
......@@ -3996,6 +4005,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
break;
default: assert(0); break;
}
(*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile, num_sb_cols);
}
}
// end RTC play code
......
......@@ -5235,4 +5235,11 @@ void vp9_set_row_mt(VP9_COMP *cpi) {
(cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
!cpi->use_svc)
cpi->row_mt = 1;
// In realtime mode, enable row based multi-threading for all the speed levels
// where non-rd path is used.
if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt &&
!cpi->use_svc) {
cpi->row_mt = 1;
}
}
......@@ -625,6 +625,23 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
memcpy(thread_data->td->counts, &cpi->common.counts,
sizeof(cpi->common.counts));
}
// Handle use_nonrd_pick_mode case.
if (cpi->sf.use_nonrd_pick_mode) {
MACROBLOCK *const x = &thread_data->td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
int j;
for (j = 0; j < MAX_MB_PLANE; ++j) {
p[j].coeff = ctx->coeff_pbuf[j][0];
p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
p[j].eobs = ctx->eobs_pbuf[j][0];
}
}
}
launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
......
......@@ -1666,11 +1666,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->rc.frames_since_golden > 4)
mode_rd_thresh = mode_rd_thresh << 3;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
if (rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
// Synchronization of this function is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
&rd_thresh_freq_fact[mode_index]))
&rd_thresh_freq_fact[mode_index]))
continue;
if (this_mode == NEWMV) {
......@@ -2030,11 +2033,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
continue;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
if (rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
// Synchronization of this function is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
&rd_thresh_freq_fact[mode_index]))
&rd_thresh_freq_fact[mode_index]))
continue;
mi->mode = this_mode;
......
......@@ -558,6 +558,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed,
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 0;
}
// Turn off adaptive_rd_thresh if row_mt is on for all the non-rd paths. This
// causes too many locks in realtime mode in certain platforms (Android ARM,
// Mac).
if (speed >= 5 && cpi->row_mt && cpi->num_workers > 1) {
sf->adaptive_rd_thresh = 0;
}
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment