Commit 20c28926 authored by Jerome Jiang's avatar Jerome Jiang

vp9: Enable adaptive_rd_threshold for row mt for realtime speed 8.

Change it to row based array to avoid the slow down cause by sync.
row-mt on, speed 8, 2 threads: ~4% speedup for VGA on ARM benefited
from adaptive_rd_threshold.

Change-Id: I887e65a53af20a6c4f48d293daaee09dab3512cf
parent 2fac50fa
......@@ -4285,13 +4285,14 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
int i, j;
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = 32;
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
tile_data->mode_map[i][j] = j;
}
}
#if CONFIG_MULTITHREAD
tile_data->search_count_mutex = NULL;
tile_data->enc_row_mt_mutex = NULL;
tile_data->row_base_thresh_freq_fact = NULL;
#endif
}
}
......
......@@ -284,6 +284,9 @@ typedef struct TileDataEnc {
int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
// Used for adaptive_rd_thresh with row multithreading
int *row_base_thresh_freq_fact;
#if CONFIG_MULTITHREAD
pthread_mutex_t *search_count_mutex;
pthread_mutex_t *enc_row_mt_mutex;
......
......@@ -82,6 +82,16 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
if (cpi->sf.adaptive_rd_thresh_row_mt) {
const int sb_rows =
(mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
int i;
this_tile->row_base_thresh_freq_fact =
(int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
sizeof(*(this_tile->row_base_thresh_freq_fact)));
for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
}
}
// Assign the sync pointer of tile row zero for every tile row > 0
......@@ -154,10 +164,15 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
TileDataEnc *this_tile =
&cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
tile_col];
if (cpi->sf.adaptive_rd_thresh_row_mt) {
if (this_tile->row_base_thresh_freq_fact != NULL) {
vpx_free(this_tile->row_base_thresh_freq_fact);
this_tile->row_base_thresh_freq_fact = NULL;
}
}
pthread_mutex_destroy(this_tile->search_count_mutex);
vpx_free(this_tile->search_count_mutex);
this_tile->search_count_mutex = NULL;
pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
vpx_free(this_tile->enc_row_mt_mutex);
this_tile->enc_row_mt_mutex = NULL;
......
......@@ -1016,6 +1016,32 @@ static int mode_offset(const PREDICTION_MODE mode) {
}
}
static INLINE int rd_less_than_thresh_row_mt(int64_t best_rd, int thresh,
const int *const thresh_fact) {
int is_rd_less_than_thresh;
is_rd_less_than_thresh =
best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX;
return is_rd_less_than_thresh;
}
static INLINE void update_thresh_freq_fact_row_mt(
VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance,
int thresh_freq_fact_idx, MV_REFERENCE_FRAME ref_frame,
THR_MODES best_mode_idx, PREDICTION_MODE mode) {
THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
int freq_fact_idx = thresh_freq_fact_idx + thr_mode_idx;
int *freq_fact = &tile_data->row_base_thresh_freq_fact[freq_fact_idx];
if (thr_mode_idx == best_mode_idx)
*freq_fact -= (*freq_fact >> 4);
else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV &&
ref_frame == LAST_FRAME && source_variance < 5) {
*freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32);
} else {
*freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
}
}
static INLINE void update_thresh_freq_fact(
VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance,
BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx,
......@@ -1398,7 +1424,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int64_t inter_mode_thresh =
RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0);
const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
int thresh_freq_fact_idx = (sb_row * BLOCK_SIZES + bsize) * MAX_MODES;
const int *const rd_thresh_freq_fact =
(cpi->sf.adaptive_rd_thresh_row_mt)
? &(tile_data->row_base_thresh_freq_fact[thresh_freq_fact_idx])
: tile_data->thresh_freq_fact[bsize];
INTERP_FILTER filter_ref;
const int bsl = mi_width_log2_lookup[bsize];
const int pred_filter_search =
......@@ -1687,14 +1719,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->rc.frames_since_golden > 4)
mode_rd_thresh = mode_rd_thresh << 3;
if (rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
if ((cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])) ||
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
// Synchronization of this function is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
// Synchronization of this function
// is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
&rd_thresh_freq_fact[mode_index]))
&rd_thresh_freq_fact[mode_index])))
continue;
if (this_mode == NEWMV) {
......@@ -2053,14 +2090,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
continue;
if (rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
if ((cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])) ||
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(
best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
// Synchronization of this function is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
// Synchronization of this function
// is only necessary when
// adaptive_rd_thresh is > 0.
cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
&rd_thresh_freq_fact[mode_index]))
&rd_thresh_freq_fact[mode_index])))
continue;
mi->mode = this_mode;
......@@ -2168,16 +2210,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// TODO(yunqingwang): Check intra mode mask and only update freq_fact
// for those valid modes.
for (i = 0; i < intra_modes; i++) {
update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
INTRA_FRAME, best_mode_idx, intra_mode_list[i]);
if (cpi->sf.adaptive_rd_thresh_row_mt)
update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance,
thresh_freq_fact_idx, INTRA_FRAME,
best_mode_idx, intra_mode_list[i]);
else
update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
INTRA_FRAME, best_mode_idx,
intra_mode_list[i]);
}
} else {
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
PREDICTION_MODE this_mode;
if (best_ref_frame != ref_frame) continue;
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
ref_frame, best_mode_idx, this_mode);
if (cpi->sf.adaptive_rd_thresh_row_mt)
update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance,
thresh_freq_fact_idx, ref_frame,
best_mode_idx, this_mode);
else
update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
ref_frame, best_mode_idx, this_mode);
}
}
}
......
......@@ -38,6 +38,7 @@ extern "C" {
#define MAX_MODES 30
#define MAX_REFS 6
#define RD_THRESH_INIT_FACT 32
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
......
......@@ -323,6 +323,7 @@ static void set_rt_speed_feature_framesize_independent(
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->adaptive_rd_thresh_row_mt = 0;
sf->use_fast_coef_costing = 1;
sf->allow_exhaustive_searches = 0;
sf->exhaustive_searches_thresh = INT_MAX;
......@@ -551,6 +552,9 @@ static void set_rt_speed_feature_framesize_independent(
}
}
if (cpi->row_mt && cpi->oxcf.max_threads > 1)
sf->adaptive_rd_thresh_row_mt = 1;
sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
// Only keep INTRA_DC mode for speed 8.
......@@ -578,11 +582,10 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 0;
if (cm->width > 640 && cm->height > 480) sf->use_simple_block_yrd = 1;
}
// Turn off adaptive_rd_thresh if row_mt is on for all the non-rd paths. This
// causes too many locks in realtime mode in certain platforms (Android ARM,
// Mac).
if (speed >= 5 && cpi->row_mt && cpi->num_workers > 1) {
// Turn off adaptive_rd_thresh if row_mt is on for speed 5, 6, 7.
if (speed >= 5 && speed < 8 && cpi->row_mt && cpi->num_workers > 1) {
sf->adaptive_rd_thresh = 0;
sf->adaptive_rd_thresh_row_mt = 0;
}
}
......
......@@ -233,6 +233,9 @@ typedef struct SPEED_FEATURES {
// mode to be evaluated. A high value means we will be faster.
int adaptive_rd_thresh;
// Flag to use adaptive_rd_thresh when row-mt it enabled.
int adaptive_rd_thresh_row_mt;
// Enables skipping the reconstruction step (idct, recon) in the
// intermediate steps assuming the last frame didn't have too many intra
// blocks and the q is less than a threshold.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment