Commit bca45646 authored by Yunqing Wang's avatar Yunqing Wang

Make allow_exhaustive_searches feature no longer adaptive

A previous patch turned on allow_exhaustive_searches feature only for
FC_GRAPHICS_ANIMATION content. This patch further modified the feature
by removing the exhaustive search limit, and made it no longer adaptive.
As a result, the 2 counts that recorded the number of motion searches
were removed, which helped achieve the determinism in the row based
multi-threading encoding. Tests showed that this patch didn't cause
the encoder much slower.

Used exhaustive_searches_thresh for this speed feature, and removed
allow_exhaustive_searches. Also, refactored the speed feature code
to follow the general speed feature setting style.

Change-Id: Ib96b182c4c8dfff4c1ab91d2497cc42bb9e5a4aa
parent 30ef50b5
......@@ -93,11 +93,6 @@ struct macroblock {
int rddiv;
int rdmult;
int mb_energy;
int *m_search_count_ptr;
int *ex_search_count_ptr;
#if CONFIG_MULTITHREAD
pthread_mutex_t *search_count_mutex;
#endif
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
......
......@@ -4341,7 +4341,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
}
}
#if CONFIG_MULTITHREAD
tile_data->search_count_mutex = NULL;
tile_data->enc_row_mt_mutex = NULL;
tile_data->row_base_thresh_freq_fact = NULL;
#endif
......@@ -4361,10 +4360,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = cpi->tplist[tile_row][tile_col];
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
// Set up pointers to per thread motion search counters.
this_tile->m_search_count = 0; // Count of motion search hits.
this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
}
}
}
......@@ -4409,13 +4404,6 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
const int mi_row_end = tile_info->mi_row_end;
int mi_row;
// Set up pointers to per thread motion search counters.
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
#if CONFIG_MULTITHREAD
td->mb.search_count_mutex = this_tile->search_count_mutex;
#endif
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
......
......@@ -281,15 +281,12 @@ typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int mode_map[BLOCK_SIZES][MAX_MODES];
int m_search_count;
int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
// Used for adaptive_rd_thresh with row multithreading
int *row_base_thresh_freq_fact;
#if CONFIG_MULTITHREAD
pthread_mutex_t *search_count_mutex;
pthread_mutex_t *enc_row_mt_mutex;
#endif
} TileDataEnc;
......
......@@ -552,7 +552,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
int tile_row, tile_col;
TileDataEnc *this_tile;
int end_of_frame;
int thread_id = thread_data->thread_id;
int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
......@@ -574,13 +573,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
tile_row = proc_job->tile_row_id;
mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
#if CONFIG_MULTITHREAD
thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
#endif
vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
}
}
......
......@@ -1998,18 +1998,6 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
#endif
// Keep track of number of exhaustive calls (this frame in this thread).
++(*x->ex_search_count_ptr);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range))
......@@ -2367,32 +2355,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
return best_sad;
}
#define MIN_EX_SEARCH_LIMIT 128
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
const SPEED_FEATURES *const sf = &cpi->sf;
int is_exhaustive_allowed;
int max_ex;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
#endif
max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
is_exhaustive_allowed = sf->allow_exhaustive_searches &&
(sf->exhaustive_searches_thresh < INT_MAX) &&
(*x->ex_search_count_ptr <= max_ex) &&
!cpi->rc.is_src_frame_alt_ref;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
return is_exhaustive_allowed;
}
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int search_method,
int error_per_bit, int *cost_list, const MV *ref_mv,
......@@ -2435,21 +2397,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_lock(x->search_count_mutex);
#endif
// Keep track of number of searches (this frame in this thread).
++(*x->m_search_count_ptr);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
// Should we allow a follow on exhaustive search?
if (is_exhaustive_allowed(cpi, x)) {
if ((sf->exhaustive_searches_thresh < INT_MAX) &&
!cpi->rc.is_src_frame_alt_ref) {
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
exhuastive_thr >>=
8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
......
......@@ -116,11 +116,6 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
vpx_malloc(sizeof(*this_tile->search_count_mutex)));
pthread_mutex_init(this_tile->search_count_mutex, NULL);
CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
......@@ -170,9 +165,6 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
this_tile->row_base_thresh_freq_fact = NULL;
}
}
pthread_mutex_destroy(this_tile->search_count_mutex);
vpx_free(this_tile->search_count_mutex);
this_tile->search_count_mutex = NULL;
pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
vpx_free(this_tile->enc_row_mt_mutex);
this_tile->enc_row_mt_mutex = NULL;
......
......@@ -20,19 +20,14 @@ static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
};
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
// Define 3 mesh density levels to control the number of searches.
#define MESH_DENSITY_LEVELS 3
static MESH_PATTERN
good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
50, 25, 15, 5, 1, 1
};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
......@@ -163,6 +158,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
const int boosted = frame_is_boosted(cpi);
int i;
sf->tx_size_search_breakout = 1;
sf->adaptive_rd_thresh = 1;
......@@ -171,6 +167,19 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->use_square_partition_only = !frame_is_boosted(cpi);
sf->use_square_only_threshold = BLOCK_16X16;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->exhaustive_searches_thresh = (1 << 22);
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 0;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
} else {
sf->exhaustive_searches_thresh = INT_MAX;
}
if (speed >= 1) {
if (cpi->oxcf.pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
......@@ -208,6 +217,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 30;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
}
if (speed >= 2) {
......@@ -229,6 +242,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->allow_partition_search_skip = 1;
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 45;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 1;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
}
if (speed >= 3) {
......@@ -247,6 +270,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 2;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
}
if (speed >= 4) {
......@@ -325,7 +358,6 @@ static void set_rt_speed_feature_framesize_independent(
sf->adaptive_rd_thresh = 1;
sf->adaptive_rd_thresh_row_mt = 0;
sf->use_fast_coef_costing = 1;
sf->allow_exhaustive_searches = 0;
sf->exhaustive_searches_thresh = INT_MAX;
sf->allow_acl = 0;
sf->copy_partition_flag = 0;
......@@ -609,7 +641,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
// and multiple threads match
if (cpi->oxcf.row_mt_bit_exact) {
sf->adaptive_rd_thresh = 0;
sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
......@@ -711,6 +742,16 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->adaptive_rd_thresh = 1;
sf->tx_size_search_breakout = 1;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
: INT_MAX;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
}
}
if (oxcf->mode == REALTIME)
set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
oxcf->content);
......@@ -720,32 +761,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
cpi->full_search_sad = vp9_full_search_sad;
cpi->diamond_search_sad = vp9_diamond_search_sad;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->allow_exhaustive_searches = 1;
if (oxcf->mode == BEST) {
sf->exhaustive_searches_thresh = (1 << 20);
sf->max_exaustive_pct = 100;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
}
} else {
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
sf->exhaustive_searches_thresh = (1 << 22);
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
if (speed > 0)
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[speed][i].interval;
}
}
} else {
sf->allow_exhaustive_searches = 0;
}
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
......@@ -783,7 +798,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
// and multiple threads match
if (cpi->oxcf.row_mt_bit_exact) {
sf->adaptive_rd_thresh = 0;
sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
......
......@@ -325,15 +325,9 @@ typedef struct SPEED_FEATURES {
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
// Flag for allowing some use of exhaustive searches;
int allow_exhaustive_searches;
// Threshold for allowing exhaistive motion search.
int exhaustive_searches_thresh;
// Maximum number of exhaustive searches for a frame.
int max_exaustive_pct;
// Pattern to be used for any exhaustive mesh searches.
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment