Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
// Load tile data into tile_buffers
for (n = 0; n < tile_cols; ++n) {
const size_t size =
get_tile(data_end, n == tile_cols - 1, &cm->error, &data);
TileBuffer *const buf = &tile_buffers[n];
buf->data = data;
buf->size = size;
buf->col = n;
data += size;
}
// Sort the buffers based on size in descending order.
qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers);
// Rearrange the tile buffers such that per-tile group the largest, and
// presumably the most difficult, tile will be decoded in the main thread.
// This should help minimize the number of instances where the main thread is
// waiting for a worker to complete.
{
int group_start = 0;
while (group_start < tile_cols) {
const TileBuffer largest = tile_buffers[group_start];
const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
memmove(tile_buffers + group_start, tile_buffers + group_start + 1,
(group_end - group_start) * sizeof(tile_buffers[0]));
tile_buffers[group_end] = largest;
group_start = group_end + 1;
}
}
n = 0;
while (n < tile_cols) {
for (i = 0; i < num_workers && n < tile_cols; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[n];
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
vp9_tile_init(tile, tile_data->cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
setup_tile_context(pbi, &tile_data->xd, 0, buf->col);
setup_tile_macroblockd(tile_data);
if (i == num_workers - 1 || n == tile_cols - 1) {
vp9_worker_execute(worker);
} else {
vp9_worker_launch(worker);
}
if (buf->col == tile_cols - 1) {
final_worker = i;
}
++n;
}
for (; i > 0; --i) {
VP9Worker *const worker = &pbi->tile_workers[i - 1];
pbi->mb.corrupted |= !vp9_worker_sync(worker);
}
if (final_worker > -1) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[final_worker].data1;
bit_reader_end = vp9_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
if (vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_0 ||
vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_1 ||
vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_2) {
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
}
}
static void error_handler(void *data) {
VP9_COMMON *const cm = (VP9_COMMON *)data;
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
}
#define RESERVED \
if (vp9_rb_read_bit(rb)) \
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \
"Reserved bit must be unset")
static size_t read_uncompressed_header(VP9D_COMP *pbi,
struct vp9_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
cm->last_frame_type = cm->frame_type;
if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame marker");
cm->version = vp9_rb_read_bit(rb);
RESERVED;
cm->show_existing_frame = vp9_rb_read_bit(rb);
if (cm->show_existing_frame) {
// Show an existing frame directly.
const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
if (cm->frame_bufs[frame_to_show].ref_count < 1)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Buffer %d does not contain a decoded frame",
frame_to_show);
ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
cm->frame_type = (FRAME_TYPE) vp9_rb_read_bit(rb);
cm->show_frame = vp9_rb_read_bit(rb);
cm->error_resilient_mode = vp9_rb_read_bit(rb);
if (cm->frame_type == KEY_FRAME) {
check_sync_code(cm, rb);
cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace
if (cm->color_space != SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
if (cm->version == 1) {
cm->subsampling_x = vp9_rb_read_bit(rb);
cm->subsampling_y = vp9_rb_read_bit(rb);
vp9_rb_read_bit(rb); // has extra plane
} else {
cm->subsampling_y = cm->subsampling_x = 1;
}
if (cm->version == 1) {
cm->subsampling_y = cm->subsampling_x = 0;
vp9_rb_read_bit(rb); // has extra plane
} else {
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"RGB not supported in profile 0");
}
for (i = 0; i < REFS_PER_FRAME; ++i) {
cm->frame_refs[i].idx = cm->new_fb_idx;
cm->frame_refs[i].buf = get_frame_new_buffer(cm);
}
setup_frame_size(pbi, rb);
cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
cm->reset_frame_context = cm->error_resilient_mode ?
0 : vp9_rb_read_literal(rb, 2);
if (cm->intra_only) {
check_sync_code(cm, rb);
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
setup_frame_size(pbi, rb);
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
for (i = 0; i < REFS_PER_FRAME; ++i) {
const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
const int idx = cm->ref_frame_map[ref];
cm->frame_refs[i].idx = idx;
cm->frame_refs[i].buf = &cm->frame_bufs[idx].buf;
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
setup_frame_size_with_refs(pbi, rb);
cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
cm->interp_filter = read_interp_filter(rb);
RefBuffer *const ref_buf = &cm->frame_refs[i];
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
ref_buf->buf->y_crop_width,
ref_buf->buf->y_crop_height,
cm->width, cm->height);
if (vp9_is_scaled(&ref_buf->sf))
vp9_extend_frame_borders(ref_buf->buf);
if (!cm->error_resilient_mode) {
cm->coding_use_prev_mi = 1;
cm->refresh_frame_context = vp9_rb_read_bit(rb);
cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
} else {
cm->coding_use_prev_mi = 0;
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
}
// This flag will be overridden by the call to vp9_setup_past_independence
// below, forcing the use of context 0 for those frame types.
cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
if (frame_is_intra_only(cm) || cm->error_resilient_mode)
vp9_setup_past_independence(cm);
setup_loopfilter(&cm->lf, rb);
setup_quantization(cm, &pbi->mb, rb);
setup_segmentation(&cm->seg, rb);
setup_tile_info(cm, rb);
if (sz == 0)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Invalid header size");
return sz;
static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
size_t partition_size) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
FRAME_CONTEXT *const fc = &cm->fc;
if (vp9_reader_init(&r, data, partition_size))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
if (cm->tx_mode == TX_MODE_SELECT)
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
for (k = 0; k < SKIP_CONTEXTS; ++k)
vp9_diff_update_prob(&r, &fc->skip_probs[k]);
if (!frame_is_intra_only(cm)) {
nmv_context *const nmvc = &fc->nmvc;
int i, j;
read_inter_mode_probs(fc, &r);
if (cm->interp_filter == SWITCHABLE)
read_switchable_interp_probs(fc, &r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]);
cm->reference_mode = read_frame_reference_mode(cm, &r);
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
read_frame_reference_mode_probs(cm, &r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
for (j = 0; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp9_diff_update_prob(&r, &fc->partition_prob[j][i]);
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
return vp9_reader_has_error(&r);
}
void vp9_init_dequantizer(VP9_COMMON *cm) {
int q;
for (q = 0; q < QINDEX_RANGE; q++) {
cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q);
cm->y_dequant[q][1] = vp9_ac_quant(q, 0);
cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q);
cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q);
}
}
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
#ifdef NDEBUG
#define debug_check_frame_counts(cm) (void)0
#else // !NDEBUG
// Counts should only be incremented when frame_parallel_decoding_mode and
// error_resilient_mode are disabled.
static void debug_check_frame_counts(const VP9_COMMON *const cm) {
FRAME_COUNTS zero_counts;
vp9_zero(zero_counts);
assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode);
assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
sizeof(cm->counts.y_mode)));
assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
sizeof(cm->counts.uv_mode)));
assert(!memcmp(cm->counts.partition, zero_counts.partition,
sizeof(cm->counts.partition)));
assert(!memcmp(cm->counts.coef, zero_counts.coef,
sizeof(cm->counts.coef)));
assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
sizeof(cm->counts.eob_branch)));
assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
sizeof(cm->counts.switchable_interp)));
assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
sizeof(cm->counts.inter_mode)));
assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
sizeof(cm->counts.intra_inter)));
assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
sizeof(cm->counts.comp_inter)));
assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref,
sizeof(cm->counts.single_ref)));
assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
sizeof(cm->counts.comp_ref)));
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
}
#endif // NDEBUG
int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
int i;
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const uint8_t *data = pbi->source;
const uint8_t *const data_end = pbi->source + pbi->source_sz;
struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler };
const size_t first_partition_size = read_uncompressed_header(pbi, &rb);
const int keyframe = cm->frame_type == KEY_FRAME;
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
// showing a frame directly
*p_data_end = data + 1;
return 0;
if (!pbi->decoded_key_frame && !keyframe)
data += vp9_rb_bytes_read(&rb);
if (!read_is_valid(data, first_partition_size, data_end))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt header length");
pbi->do_loopfilter_inline =
(cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level;
if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Loop filter thread creation failed");
}
}
alloc_tile_storage(pbi, tile_rows, tile_cols);
xd->mode_info_stride = cm->mode_info_stride;
if (cm->coding_use_prev_mi)
set_prev_mi(cm);
else
cm->prev_mi = NULL;
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cm->counts);
for (i = 0; i < MAX_MB_PLANE; ++i)
vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
// TODO(jzern): remove frame_parallel_decoding_mode restriction for
// single-frame tile decoding.
if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
cm->frame_parallel_decoding_mode) {
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size);
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size);
}
new_fb->corrupted |= xd->corrupted;
if (keyframe && !new_fb->corrupted)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(cm);
if (!frame_is_intra_only(cm)) {
vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
} else {
debug_check_frame_counts(cm);
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = cm->fc;