Commit 71061e93 authored by Ranjit Kumar Tulabandu's avatar Ranjit Kumar Tulabandu Committed by Yunqing Wang

Row based multi-threading of encoding stage

(Yunqing Wang)
This patch implements the row-based multi-threading within tiles in
the encoding pass, and substantially speeds up the multi-threaded
encoder in VP9.

Speed tests at speed 1 on STDHD(using 4 tiles) set show that the
average speedups of the encoding pass(second pass in the 2-pass
encoding) is 7% while using 2 threads, 16% while using 4 threads,
85% while using 8 threads, and 116% while using 16 threads.

Change-Id: I12e41dbc171951958af9e6d098efd6e2c82827de
parent eeb288d5
......@@ -925,10 +925,11 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) {
static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {
MACROBLOCKD *const xd = &data->xd;
const int tile_row = 0;
vpx_start_encode(&data->bit_writer, data->dest);
write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
&data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude,
data->interp_filter_selected);
&data->bit_writer, tile_row, data->tile_idx,
&data->max_mv_magnitude, data->interp_filter_selected);
vpx_stop_encode(&data->bit_writer);
return 1;
}
......
......@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_BLOCK_H_
#define VP9_ENCODER_VP9_BLOCK_H_
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
......@@ -88,6 +90,9 @@ struct macroblock {
int mb_energy;
int *m_search_count_ptr;
int *ex_search_count_ptr;
#if CONFIG_MULTITHREAD
pthread_mutex_t *search_count_mutex;
#endif
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
......
......@@ -3095,13 +3095,18 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
const int mi_col_start = tile_info->mi_col_start;
const int mi_col_end = tile_info->mi_col_end;
int mi_col;
const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
const int num_sb_cols =
get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
int sb_col_in_tile;
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
const struct segmentation *const seg = &cm->seg;
int dummy_rate;
int64_t dummy_dist;
......@@ -3112,6 +3117,9 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile - 1);
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
......@@ -3163,6 +3171,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rdc, INT64_MAX, td->pc_root);
}
(*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile, num_sb_cols);
}
}
......@@ -4109,13 +4119,17 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
tile_data->mode_map[i][j] = j;
}
}
#if CONFIG_MULTITHREAD
tile_data->search_count_mutex = NULL;
tile_data->enc_row_mt_mutex = NULL;
#endif
}
}
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileInfo *tile_info =
&cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
TileInfo *tile_info = &this_tile->tile_info;
vp9_tile_init(tile_info, cm, tile_row, tile_col);
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
......@@ -4125,6 +4139,10 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = cpi->tplist[tile_row][tile_col];
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
// Set up pointers to per thread motion search counters.
this_tile->m_search_count = 0; // Count of motion search hits.
this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
}
}
}
......@@ -4170,10 +4188,11 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
int mi_row;
// Set up pointers to per thread motion search counters.
this_tile->m_search_count = 0; // Count of motion search hits.
this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
#if CONFIG_MULTITHREAD
td->mb.search_count_mutex = this_tile->search_count_mutex;
#endif
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
......@@ -4289,11 +4308,20 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
#endif
// If allowed, encoding tiles in parallel with one thread handling one tile.
if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
vp9_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
if (!cpi->new_mt) {
cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
// If allowed, encoding tiles in parallel with one thread handling one
// tile when row based multi-threading is disabled.
if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
vp9_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
} else {
cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
vp9_encode_tiles_row_mt(cpi);
}
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
......
......@@ -39,6 +39,9 @@ void vp9_init_tile_data(struct VP9_COMP *cpi);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row,
int tile_col);
void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col, int mi_row);
void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);
#ifdef __cplusplus
......
......@@ -1575,17 +1575,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
highbd_set_var_fns(cpi);
#endif
// Enable multi-threading for first pass.
cpi->new_mt = 0;
if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
cpi->oxcf.new_mt && !cpi->use_svc)
cpi->new_mt = 1;
if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
(cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&
!cpi->use_svc)
cpi->new_mt = 1;
vp9_set_new_mt(cpi);
}
#ifndef M_LOG2_E
......@@ -5213,3 +5203,17 @@ void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
vp9_update_entropy(cpi, 0);
}
}
void vp9_set_new_mt(VP9_COMP *cpi) {
// Enable row based multi-threading for supported modes of encoding
cpi->new_mt = 0;
if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
cpi->oxcf.new_mt && !cpi->use_svc)
cpi->new_mt = 1;
if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
(cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&
!cpi->use_svc)
cpi->new_mt = 1;
}
......@@ -276,6 +276,10 @@ typedef struct TileDataEnc {
int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
#if CONFIG_MULTITHREAD
pthread_mutex_t *search_count_mutex;
pthread_mutex_t *enc_row_mt_mutex;
#endif
} TileDataEnc;
typedef struct RowMTInfo {
......@@ -897,6 +901,8 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_new_mt(VP9_COMP *cpi);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
#ifdef __cplusplus
......
......@@ -341,7 +341,7 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
#if CONFIG_MULTITHREAD
const int nsync = row_mt_sync->sync_range;
int cur;
// Only signal when there are enough filtered SB for next row to run.
// Only signal when there are enough encoded blocks for next row to run.
int sig = 1;
if (c < cols - 1) {
......@@ -542,3 +542,100 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
multi_thread_ctxt, num_workers);
}
static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
MultiThreadHandle *multi_thread_ctxt) {
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
int tile_row, tile_col;
TileDataEnc *this_tile;
int end_of_frame;
int thread_id = thread_data->thread_id;
int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
JobNode *proc_job = NULL;
int mi_row;
end_of_frame = 0;
while (0 == end_of_frame) {
// Get the next job in the queue
proc_job =
(JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
if (NULL == proc_job) {
// Query for the status of other tiles
end_of_frame = vp9_get_tiles_proc_status(
multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
tile_cols);
} else {
tile_col = proc_job->tile_col_id;
tile_row = proc_job->tile_row_id;
mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
#if CONFIG_MULTITHREAD
thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
#endif
vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
}
}
return 0;
}
void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
int i;
if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
multi_thread_ctxt->allocated_tile_rows < tile_rows ||
multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
vp9_row_mt_mem_dealloc(cpi);
vp9_init_tile_data(cpi);
vp9_row_mt_mem_alloc(cpi);
} else {
vp9_init_tile_data(cpi);
}
create_enc_workers(cpi, num_workers);
vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
vp9_prepare_job_queue(cpi, ENCODE_JOB);
vp9_multi_thread_tile_init(cpi);
for (i = 0; i < num_workers; i++) {
EncWorkerData *thread_data;
thread_data = &cpi->tile_thr_data[i];
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
thread_data->td->rd_counts = cpi->td.rd_counts;
}
if (thread_data->td->counts != &cpi->common.counts) {
memcpy(thread_data->td->counts, &cpi->common.counts,
sizeof(cpi->common.counts));
}
}
launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
multi_thread_ctxt, num_workers);
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
// Accumulate counters.
if (i < cpi->num_workers - 1) {
vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}
}
......@@ -44,6 +44,8 @@ typedef struct VP9RowMTSyncData {
void vp9_encode_tiles_mt(struct VP9_COMP *cpi);
void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi);
void vp9_encode_fp_row_mt(struct VP9_COMP *cpi);
void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c);
......
......@@ -1993,9 +1993,18 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
#endif
// Keep track of number of exhaustive calls (this frame in this thread).
++(*x->ex_search_count_ptr);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range))
......@@ -2356,13 +2365,27 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
#define MIN_EX_SEARCH_LIMIT 128
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
const SPEED_FEATURES *const sf = &cpi->sf;
const int max_ex =
VPXMAX(MIN_EX_SEARCH_LIMIT,
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
int is_exhaustive_allowed;
int max_ex;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
#endif
max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
return sf->allow_exhaustive_searches &&
(sf->exhaustive_searches_thresh < INT_MAX) &&
(*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
is_exhaustive_allowed = sf->allow_exhaustive_searches &&
(sf->exhaustive_searches_thresh < INT_MAX) &&
(*x->ex_search_count_ptr <= max_ex) &&
!cpi->rc.is_src_frame_alt_ref;
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
return is_exhaustive_allowed;
}
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
......@@ -2407,9 +2430,19 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_lock(x->search_count_mutex);
#endif
// Keep track of number of searches (this frame in this thread).
++(*x->m_search_count_ptr);
#if CONFIG_MULTITHREAD
if (NULL != x->search_count_mutex)
pthread_mutex_unlock(x->search_count_mutex);
#endif
// Should we allow a follow on exhaustive search?
if (is_exhaustive_allowed(cpi, x)) {
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
......
......@@ -100,11 +100,32 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
}
#if CONFIG_MULTITHREAD
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
vpx_malloc(sizeof(*this_tile->search_count_mutex)));
pthread_mutex_init(this_tile->search_count_mutex, NULL);
CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
pthread_mutex_init(this_tile->enc_row_mt_mutex, NULL);
}
}
#endif
}
void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int tile_col;
#if CONFIG_MULTITHREAD
int tile_row;
#endif
// Deallocate memory for job queue
if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
......@@ -124,6 +145,25 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
}
#if CONFIG_MULTITHREAD
for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
tile_row++) {
for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
tile_col++) {
TileDataEnc *this_tile =
&cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
tile_col];
pthread_mutex_destroy(this_tile->search_count_mutex);
vpx_free(this_tile->search_count_mutex);
this_tile->search_count_mutex = NULL;
pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
vpx_free(this_tile->enc_row_mt_mutex);
this_tile->enc_row_mt_mutex = NULL;
}
}
#endif
}
void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
......
......@@ -1657,7 +1657,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
mode_rd_thresh = mode_rd_thresh << 3;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
rd_thresh_freq_fact[mode_index]))
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
&rd_thresh_freq_fact[mode_index]))
continue;
if (this_mode == NEWMV) {
......@@ -2018,7 +2021,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
rd_thresh_freq_fact[mode_index]))
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
&rd_thresh_freq_fact[mode_index]))
continue;
mi->mode = this_mode;
......
......@@ -610,7 +610,15 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
}
void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
int bsize, int best_mode_index) {
int bsize,
#if CONFIG_MULTITHREAD
pthread_mutex_t *enc_row_mt_mutex,
#endif
int best_mode_index) {
#if CONFIG_MULTITHREAD
if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);
#endif
if (rd_thresh > 0) {
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
......@@ -628,6 +636,10 @@ void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
}
}
}
#if CONFIG_MULTITHREAD
if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);
#endif
}
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
......
......@@ -164,11 +164,32 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
#if CONFIG_MULTITHREAD
pthread_mutex_t *enc_row_mt_mutex,
#endif
int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
int thresh_fact) {
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
#if CONFIG_MULTITHREAD
pthread_mutex_t *enc_row_mt_mutex,
#endif
const int *const thresh_fact) {
int is_rd_less_than_thresh;
#if CONFIG_MULTITHREAD
// Synchronize to ensure data coherency as thresh_freq_fact is maintained at
// tile level and not thread-safe with row based multi-threading
if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);
#endif
is_rd_less_than_thresh =
best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX;
#if CONFIG_MULTITHREAD
if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);
#endif
return is_rd_less_than_thresh;
}
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
......
......@@ -3043,7 +3043,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
int64_t mode_threshold[MAX_MODES];
int *mode_map = tile_data->mode_map[bsize];
int *tile_mode_map = tile_data->mode_map[bsize];
int mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
// lock mechanism involved with reads from
// tile_mode_map
const int mode_search_skip_flags = sf->mode_search_skip_flags;
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
......@@ -3155,10 +3158,19 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
#if CONFIG_MULTITHREAD
if (NULL != tile_data->enc_row_mt_mutex)
pthread_mutex_lock(tile_data->enc_row_mt_mutex);
#endif
for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
midx = sf->schedule_mode_search ? mode_skip_start : 0;
memcpy(mode_map, tile_mode_map, sizeof(mode_map));
while (midx > 4) {
uint8_t end_pos = 0;
for (i = 5; i < midx; ++i) {
......@@ -3172,6 +3184,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
midx = end_pos;
}
memcpy(tile_mode_map, mode_map, sizeof(mode_map));
#if CONFIG_MULTITHREAD
if (NULL != tile_data->enc_row_mt_mutex)
pthread_mutex_unlock(tile_data->enc_row_mt_mutex);
#endif
for (midx = 0; midx < MAX_MODES; ++midx) {
int mode_index = mode_map[midx];
int mode_excluded = 0;
......@@ -3573,6 +3592,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
// If adaptive interp filter is enabled, then the current leaf node of 8x8
// data is needed for sub8x8. Hence preserve the context.
if (cpi->new_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
......@@ -3599,7 +3621,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!cpi->rc.is_src_frame_alt_ref)
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
sf->adaptive_rd_thresh, bsize, best_mode_index);
sf->adaptive_rd_thresh, bsize,
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
best_mode_index);
// macroblock modes
*mi = best_mbmode;
......@@ -3737,7 +3763,11 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
(cm->interp_filter == mi->interp_filter));
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
cpi->sf.adaptive_rd_thresh, bsize,
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
THR_ZEROMV);
vp9_zero(best_pred_diff);
vp9_zero(best_filter_diff);
......@@ -3789,6 +3819,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
int internal_active_edge =
vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
memset(x->zcoeff_blk[TX_4X4], 0, 4);
......@@ -3880,7 +3911,10 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!internal_active_edge &&
rd_less_than_thresh(best_rd,
rd_opt->threshes[segment_id][bsize][ref_index],
tile_data->thresh_freq_fact[bsize][ref_index]))
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
&rd_thresh_freq_fact[ref_index]))
continue;
comp_pred = second_ref_frame > INTRA_FRAME;
......@@ -4324,7 +4358,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
!is_inter_block(&best_mbmode));
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
bsize, best_ref_index);
bsize,
#if CONFIG_MULTITHREAD
tile_data->enc_row_mt_mutex,
#endif
best_ref_index);
// macroblock modes
*mi = best_mbmode;
......
......@@ -585,6 +585,15 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
rd->thresh_mult_sub8x8[i] = INT_MAX;
}
}
// With row based multi-threading, the following speed features
// have to be disabled to guarantee that bitstreams encoded with single thread
// and multiple threads match
if (cpi->oxcf.ethread_bit_match) {
sf->adaptive_rd_thresh = 0;
sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
}
void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
......@@ -747,4 +756,13 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
if (!cpi->oxcf.frame_periodic_boost) {
sf->max_delta_qindex = 0;
}
// With row based multi-threading, the following speed features
// have to be disabled to guarantee that bitstreams encoded with single thread
// and multiple threads match
if (cpi->oxcf.ethread_bit_match) {
sf->adaptive_rd_thresh = 0;
sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
}
......@@ -1459,6 +1459,9 @@ static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
cfg->ss_number_layers > 1 && cfg->ts_number_layers > 1) {
return VPX_CODEC_INVALID_PARAM;
}
vp9_set_new_mt(ctx->cpi);
return VPX_CODEC_OK;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment