Commit 8b0c11c3 authored by Ranjit Kumar Tulabandu's avatar Ranjit Kumar Tulabandu Committed by Yunqing Wang

Multi-threading of first pass stats collection

(yunqingwang)
1. Rebased the patch. Incorporated recent first pass changes.
2. Turned on the first pass unit test.

Change-Id: Ia2f7ba8152d0b6dd6bf8efb9dfaf505ba7d8edee
parent 91aa1fae
......@@ -82,9 +82,8 @@ class VPxFirstPassEncoderThreadTest
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
// For now, new_mt_mode only works for 2-pass encoding.
// Enable this once the fp mt patch is checked in.
// if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
// encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
encoder_initialized_ = true;
}
......@@ -131,7 +130,7 @@ static void compare_fp_stats(vpx_fixed_buf_t *fp_stats) {
for (j = 0; j < kDbl; ++j) {
EXPECT_LE(fabs(*frame_stats1 - *frame_stats2),
fabs(*frame_stats1) / 1000.0);
fabs(*frame_stats1) / 10000.0);
frame_stats1++;
frame_stats2++;
}
......@@ -146,7 +145,7 @@ static void compare_fp_stats(vpx_fixed_buf_t *fp_stats) {
}
TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 50);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
first_pass_only_ = 1;
cfg_.rc_target_bitrate = 1000;
......
......@@ -131,6 +131,10 @@ struct macroblock {
int use_lp32x32fdct;
int skip_encode;
// In first pass, intra prediction is done based on source pixels
// at tile boundaries
int fp_src_pred;
// use fast quantization process
int quant_fp;
......
......@@ -773,9 +773,10 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
}
}
vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride, dst,
dst_stride, col, row, plane);
vp9_predict_intra_block(
xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
(x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
dst_stride, col, row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
......
......@@ -50,6 +50,7 @@
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_multi_thread.h"
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vp9/encoder/vp9_ratectrl.h"
......@@ -1563,6 +1564,13 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
#if CONFIG_VP9_HIGHBITDEPTH
highbd_set_var_fns(cpi);
#endif
// Enable multi-threading for first pass.
cpi->new_mt = 0;
if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
cpi->oxcf.new_mt)
cpi->new_mt = 1;
}
#ifndef M_LOG2_E
......@@ -1719,6 +1727,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
}
#endif
#if ENABLE_MT_BIT_MATCH
CHECK_MEM_ERROR(
cm, cpi->twopass.fp_mb_float_stats,
vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
#endif
cpi->refresh_alt_ref_frame = 0;
cpi->multi_arf_last_grp_enabled = 0;
......@@ -2076,6 +2090,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
}
vpx_free(cpi->tile_thr_data);
vpx_free(cpi->workers);
vp9_row_mt_mem_dealloc(cpi);
if (cpi->num_workers > 1) {
vp9_loop_filter_dealloc(&cpi->lf_row_sync);
......@@ -2098,6 +2113,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
}
#endif
#if ENABLE_MT_BIT_MATCH
vpx_free(cpi->twopass.fp_mb_float_stats);
cpi->twopass.fp_mb_float_stats = NULL;
#endif
vp9_remove_common(cm);
vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
......@@ -4802,6 +4822,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
}
cpi->td.mb.fp_src_pred = 0;
if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
const int lossless = is_lossless_requested(oxcf);
#if CONFIG_VP9_HIGHBITDEPTH
......
......@@ -33,7 +33,9 @@
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_job_queue.h"
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_mcomp.h"
......@@ -256,6 +258,8 @@ typedef struct VP9EncoderConfig {
int render_width;
int render_height;
VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
int new_mt;
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
......@@ -269,8 +273,34 @@ typedef struct TileDataEnc {
int mode_map[BLOCK_SIZES][MAX_MODES];
int m_search_count;
int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
} TileDataEnc;
typedef struct RowMTInfo {
JobQueueHandle job_queue_hdl;
#if CONFIG_MULTITHREAD
pthread_mutex_t job_mutex;
#endif
} RowMTInfo;
typedef struct MultiThreadHandle {
int allocated_tile_rows;
int allocated_tile_cols;
int allocated_vert_unit_rows;
// Frame level params
int num_tile_vert_sbs[MAX_NUM_TILE_ROWS];
// Job Queue structure and handles
JobQueue *job_queue;
int jobs_per_tile_col;
RowMTInfo row_mt_info[MAX_NUM_TILE_COLS];
int thread_id_to_tile_id[MAX_NUM_THREADS]; // Mapping of threads to tiles
} MultiThreadHandle;
typedef struct RD_COUNTS {
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
......@@ -629,6 +659,10 @@ typedef struct VP9_COMP {
int keep_level_stats;
Vp9LevelInfo level_info;
MultiThreadHandle multi_thread_ctxt;
void (*row_mt_sync_read_ptr)(VP9RowMTSync *const, int, int);
void (*row_mt_sync_write_ptr)(VP9RowMTSync *const, int, int, const int);
int new_mt;
// Previous Partition Info
BLOCK_SIZE *prev_partition;
......@@ -808,6 +842,18 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) {
return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
}
static INLINE int get_num_vert_units(TileInfo tile, int shift) {
int num_vert_units =
(tile.mi_row_end - tile.mi_row_start + (1 << shift) - 1) >> shift;
return num_vert_units;
}
static INLINE int get_num_cols(TileInfo tile, int shift) {
int num_cols =
(tile.mi_col_end - tile.mi_col_start + (1 << shift) - 1) >> shift;
return num_cols;
}
static INLINE int get_level_index(VP9_LEVEL level) {
int i;
for (i = 0; i < VP9_LEVELS; ++i) {
......
This diff is collapsed.
......@@ -15,6 +15,10 @@
extern "C" {
#endif
#define MAX_NUM_TILE_COLS (1 << 6)
#define MAX_NUM_TILE_ROWS 4
#define MAX_NUM_THREADS 80
struct VP9_COMP;
struct ThreadData;
......@@ -22,10 +26,41 @@ typedef struct EncWorkerData {
struct VP9_COMP *cpi;
struct ThreadData *td;
int start;
int thread_id;
int tile_completion_status[MAX_NUM_TILE_COLS];
} EncWorkerData;
// Encoder row synchronization
typedef struct VP9RowMTSyncData {
#if CONFIG_MULTITHREAD
pthread_mutex_t *mutex_;
pthread_cond_t *cond_;
#endif
// Allocate memory to store the sb/mb block index in each row.
int *cur_col;
int sync_range;
int rows;
} VP9RowMTSync;
void vp9_encode_tiles_mt(struct VP9_COMP *cpi);
void vp9_encode_fp_row_mt(struct VP9_COMP *cpi);
void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c);
void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
const int cols);
void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c);
void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
const int cols);
// Allocate memory for row based multi-threading synchronization.
void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, struct VP9Common *cm,
int rows);
// Deallocate row based multi-threading synchronization related mutex and data.
void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync);
#ifdef __cplusplus
} // extern "C"
#endif
......
This diff is collapsed.
......@@ -39,6 +39,40 @@ typedef struct {
} FIRSTPASS_MB_STATS;
#endif
#define INVALID_ROW -1
#define ENABLE_MT_BIT_MATCH 0
#if ENABLE_MT_BIT_MATCH
typedef struct {
double frame_mb_intra_factor;
double frame_mb_brightness_factor;
double frame_mb_neutral_count;
} FP_MB_FLOAT_STATS;
#endif
typedef struct {
double intra_factor;
double brightness_factor;
int64_t coded_error;
int64_t sr_coded_error;
int64_t frame_noise_energy;
int64_t intra_error;
int intercount;
int second_ref_count;
double neutral_count;
int intra_skip_count;
int image_data_start_row;
int mvcount;
int sum_mvr;
int sum_mvr_abs;
int sum_mvc;
int sum_mvc_abs;
int64_t sum_mvrs;
int64_t sum_mvcs;
int sum_in_vectors;
int intra_smooth_count;
} FIRSTPASS_DATA;
typedef struct {
double frame;
double weight;
......@@ -114,6 +148,11 @@ typedef struct {
uint8_t *this_frame_mb_stats;
FIRSTPASS_MB_STATS firstpass_mb_stats;
#endif
#if ENABLE_MT_BIT_MATCH
FP_MB_FLOAT_STATS *fp_mb_float_stats;
#endif
// An indication of the content type of the current frame
FRAME_CONTENT_TYPE fr_content_type;
......@@ -141,12 +180,20 @@ typedef struct {
} TWO_PASS;
struct VP9_COMP;
struct ThreadData;
struct TileDataEnc;
void vp9_init_first_pass(struct VP9_COMP *cpi);
void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
void vp9_end_first_pass(struct VP9_COMP *cpi);
void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi,
struct ThreadData *td,
FIRSTPASS_DATA *fp_acc_data,
struct TileDataEnc *tile_data,
MV *best_ref_mv, int mb_row);
void vp9_init_second_pass(struct VP9_COMP *cpi);
void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
......
/*
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_JOB_QUEUE_H_
#define VP9_ENCODER_VP9_JOB_QUEUE_H_
typedef enum {
FIRST_PASS_JOB,
ENCODE_JOB,
ARNR_JOB,
NUM_JOB_TYPES,
} JOB_TYPE;
// Encode job parameters
typedef struct {
int vert_unit_row_num; // Index of the vertical unit row
int tile_col_id; // tile col id within a tile
int tile_row_id; // tile col id within a tile
} JobNode;
// Job queue element parameters
typedef struct {
// Pointer to the next link in the job queue
void *next;
// Job information context of the module
JobNode job_info;
} JobQueue;
// Job queue handle
typedef struct {
// Pointer to the next link in the job queue
void *next;
// Counter to store the number of jobs picked up for processing
int num_jobs_acquired;
} JobQueueHandle;
#endif // VP9_ENCODER_VP9_JOB_QUEUE_H_
/*
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_multi_thread.h"
void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
int tile_id) {
RowMTInfo *row_mt_info;
JobQueueHandle *job_queue_hdl = NULL;
void *next = NULL;
JobNode *job_info = NULL;
#if CONFIG_MULTITHREAD
pthread_mutex_t *mutex_handle = NULL;
#endif
row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
#if CONFIG_MULTITHREAD
mutex_handle = &row_mt_info->job_mutex;
#endif
// lock the mutex for queue access
#if CONFIG_MULTITHREAD
pthread_mutex_lock(mutex_handle);
#endif
next = job_queue_hdl->next;
if (NULL != next) {
JobQueue *job_queue = (JobQueue *)next;
job_info = &job_queue->job_info;
// Update the next job in the queue
job_queue_hdl->next = job_queue->next;
job_queue_hdl->num_jobs_acquired++;
}
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(mutex_handle);
#endif
return job_info;
}
void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
struct VP9Common *cm = &cpi->common;
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int tile_row, tile_col;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int jobs_per_tile_col, total_jobs;
jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
// Calculate the total number of jobs
total_jobs = jobs_per_tile_col * tile_cols;
multi_thread_ctxt->allocated_tile_cols = tile_cols;
multi_thread_ctxt->allocated_tile_rows = tile_rows;
multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;
multi_thread_ctxt->job_queue =
(JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue));
#if CONFIG_MULTITHREAD
// Create mutex for each tile
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
pthread_mutex_init(&row_mt_info->job_mutex, NULL);
}
#endif
// Allocate memory for row based multi-threading
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
}
// Assign the sync pointer of tile row zero for every tile row > 0
for (tile_row = 1; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
this_tile->row_mt_sync = this_col_tile->row_mt_sync;
}
}
// Calculate the number of vertical units in the given tile row
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols];
TileInfo *tile_info = &this_tile->tile_info;
multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
}
}
void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int tile_col;
// Deallocate memory for job queue
if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
#if CONFIG_MULTITHREAD
// Destroy mutex for each tile
for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
tile_col++) {
RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
}
#endif
// Free row based multi-threading sync memory
for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
}
}
void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int i;
for (i = 0; i < tile_cols; i++) {
TileDataEnc *this_tile = &cpi->tile_data[i];
int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;
// Initialize cur_col to -1 for all rows.
memset(this_tile->row_mt_sync.cur_col, -1,
sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col);
vp9_zero(this_tile->fp_data);
this_tile->fp_data.image_data_start_row = INVALID_ROW;
}
}
void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
int tile_cols, int num_workers) {
int tile_id = 0;
int i;
// Allocating the threads for the tiles
for (i = 0; i < num_workers; i++) {
multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
if (tile_id == tile_cols) tile_id = 0;
}
}
int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
int cur_tile_id) {
RowMTInfo *row_mt_info;
JobQueueHandle *job_queue_hndl;
#if CONFIG_MULTITHREAD
pthread_mutex_t *mutex;
#endif
int num_jobs_remaining;
row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
job_queue_hndl = &row_mt_info->job_queue_hdl;
#if CONFIG_MULTITHREAD
mutex = &row_mt_info->job_mutex;
#endif
#if CONFIG_MULTITHREAD
pthread_mutex_lock(mutex);
#endif
num_jobs_remaining =
multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(mutex);
#endif
return (num_jobs_remaining);
}
void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
VP9_COMMON *const cm = &cpi->common;
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
JobQueue *job_queue = multi_thread_ctxt->job_queue;
const int tile_cols = 1 << cm->log2_tile_cols;
int job_row_num, jobs_per_tile, jobs_per_tile_col, total_jobs;
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int tile_col, i;
jobs_per_tile_col = (job_type != ENCODE_JOB) ? cm->mb_rows : sb_rows;
total_jobs = jobs_per_tile_col * tile_cols;
multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
// memset the entire job queue buffer to zero
memset(job_queue, 0, total_jobs * sizeof(JobQueue));
// Job queue preparation
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
JobQueue *job_queue_curr, *job_queue_temp;
int tile_row = 0;
tile_ctxt->job_queue_hdl.next = (void *)job_queue;
tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;
job_queue_curr = job_queue;
job_queue_temp = job_queue;
// loop over all the vertical rows
for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
job_row_num++, jobs_per_tile++) {
job_queue_curr->job_info.vert_unit_row_num = job_row_num;
job_queue_curr->job_info.tile_col_id = tile_col;
job_queue_curr->job_info.tile_row_id = tile_row;
job_queue_curr->next = (void *)(job_queue_temp + 1);
job_queue_curr = ++job_queue_temp;
if (ENCODE_JOB == job_type) {
if (jobs_per_tile >=
multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
tile_row++;
jobs_per_tile = -1;
}
}
}
// Set the last pointer to NULL
job_queue_curr += -1;
job_queue_curr->next = (void *)NULL;
// Move to the next tile
job_queue += jobs_per_tile_col;
}
for (i = 0; i < cpi->num_workers; i++) {
EncWorkerData *thread_data;
thread_data = &cpi->tile_thr_data[i];
thread_data->thread_id = i;
for (tile_col = 0; tile_col < tile_cols; tile_col++)
thread_data->tile_completion_status[tile_col] = 0;
}
}
int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
int *tile_completion_status, int *cur_tile_id,
int tile_cols) {
int tile_col;
int tile_id = -1; // Stores the tile ID with minimum proc done
int max_num_jobs_remaining = 0;
int num_jobs_remaining;
// Mark the completion to avoid check in the loop
tile_completion_status[*cur_tile_id] = 1;
// Check for the status of all the tiles
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
if (tile_completion_status[tile_col] == 0) {
num_jobs_remaining =
vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
// Mark the completion to avoid checks during future switches across tiles
if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
if (num_jobs_remaining > max_num_jobs_remaining) {
max_num_jobs_remaining = num_jobs_remaining;
tile_id = tile_col;
}
}
}
if (-1 == tile_id) {
return 1;
} else {
// Update the cur ID to the next tile ID that will be processed,
// which will be the least processed tile
*cur_tile_id = tile_id;
return 0;
}
}
/*
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_MULTI_THREAD_H
#define VP9_ENCODER_VP9_MULTI_THREAD_H
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_job_queue.h"
void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
int tile_id);
void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type);
int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
int cur_tile_id);
void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
int tile_cols, int num_workers);
void vp9_multi_thread_tile_init(VP9_COMP *cpi);
void vp9_row_mt_mem_alloc(VP9_COMP *cpi);
void vp9_row_mt_mem_dealloc(VP9_COMP *cpi);
int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
int *tile_completion_status, int *cur_tile_id,
int tile_cols);
#endif // VP9_ENCODER_VP9_MULTI_THREAD_H
......@@ -51,6 +51,7 @@ struct vp9_extracfg {
vpx_color_range_t color_range;
int render_width;
int render_height;
unsigned int new_mt;
};
static struct vp9_extracfg default_extra_cfg = {
......@@ -82,6 +83,7 @@ static struct vp9_extracfg default_extra_cfg = {
0, // color range
0, // render width
0, // render height
1, // new_mt
};
struct vpx_codec_alg_priv {
......@@ -245,6 +247,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
"kf_min_dist not supported in auto mode, use 0 "
"or kf_max_dist instead.");
RANGE_CHECK(extra_cfg, new_mt, 0, 1);
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
......@@ -554,6 +557,8 @@ static vpx_codec_err_t set_encoder_config(
oxcf->target_level = extra_cfg->target_level;
oxcf->new_mt = extra_cfg->new_mt;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
#if CONFIG_SPATIAL_SVC
oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
......@@ -842,6 +847,13 @@ static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
static vpx_codec_err_t ctrl_set_new_mt(vpx_codec_alg_priv_t *ctx,