Newer
Older
const int tile_rows = 1 << cm->log2_tile_rows;
const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
TileBuffer tile_buffers[1][1 << 6];
assert(tile_rows == 1);
(void)tile_rows;
// TODO(jzern): See if we can remove the restriction of passing in max
// threads to the decoder.
if (pbi->num_tile_workers == 0) {
const int num_threads = pbi->max_threads & ~1;
// TODO(jzern): Allocate one less worker, as in the current code we only
// use num_threads - 1 workers.
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
for (i = 0; i < num_threads; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
if (i < num_threads - 1 && !winterface->reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
}
}
}
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
winterface->sync(&pbi->tile_workers[n]);
pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
}
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(cm->above_context, 0,
sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
vpx_memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_mi_cols);
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
// Sort the buffers based on size in descending order.
qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]),
compare_tile_buffers);
// Rearrange the tile buffers such that per-tile group the largest, and
// presumably the most difficult, tile will be decoded in the main thread.
// This should help minimize the number of instances where the main thread is
// waiting for a worker to complete.
{
int group_start = 0;
while (group_start < tile_cols) {
const TileBuffer largest = tile_buffers[0][group_start];
const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
(group_end - group_start) * sizeof(tile_buffers[0][0]));
tile_buffers[0][group_end] = largest;
group_start = group_end + 1;
}
}
n = 0;
while (n < tile_cols) {
for (i = 0; i < num_workers && n < tile_cols; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[0][n];
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
vp9_tile_init(tile, tile_data->cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
pbi->decrypt_state);
init_macroblockd(cm, &tile_data->xd);
vp9_zero(tile_data->xd.dqcoeff);
if (i == num_workers - 1 || n == tile_cols - 1) {
if (buf->col == tile_cols - 1) {
final_worker = i;
}
++n;
}
for (; i > 0; --i) {
VP9Worker *const worker = &pbi->tile_workers[i - 1];
if (final_worker > -1) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[final_worker].data1;
bit_reader_end = vp9_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
static void error_handler(void *data) {
VP9_COMMON *const cm = (VP9_COMMON *)data;
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
}
int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
}
BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
int profile = vp9_rb_read_bit(rb);
profile |= vp9_rb_read_bit(rb) << 1;
if (profile > 2)
profile += vp9_rb_read_bit(rb);
return (BITSTREAM_PROFILE) profile;
}
static void read_bitdepth_colorspace_sampling(
VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
if (cm->profile >= PROFILE_2)
cm->bit_depth = vp9_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
if (cm->color_space != SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
cm->subsampling_x = vp9_rb_read_bit(rb);
cm->subsampling_y = vp9_rb_read_bit(rb);
if (cm->subsampling_x == 1 && cm->subsampling_y == 1)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"4:2:0 color not supported in profile 1 or 3");
if (vp9_rb_read_bit(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Reserved bit set");
} else {
cm->subsampling_y = cm->subsampling_x = 1;
}
} else {
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
// Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
// 4:2:2 or 4:4:0 chroma sampling is not allowed.
cm->subsampling_y = cm->subsampling_x = 0;
if (vp9_rb_read_bit(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Reserved bit set");
} else {
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"4:4:4 color not supported in profile 0 or 2");
}
}
}
static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
cm->last_frame_type = cm->frame_type;
if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame marker");
cm->profile = vp9_read_profile(rb);
if (cm->profile >= MAX_PROFILES)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Unsupported bitstream profile");
cm->show_existing_frame = vp9_rb_read_bit(rb);
if (cm->show_existing_frame) {
// Show an existing frame directly.
const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
if (frame_to_show < 0 || cm->frame_bufs[frame_to_show].ref_count < 1)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Buffer %d does not contain a decoded frame",
frame_to_show);
ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
cm->frame_type = (FRAME_TYPE) vp9_rb_read_bit(rb);
cm->show_frame = vp9_rb_read_bit(rb);
cm->error_resilient_mode = vp9_rb_read_bit(rb);
if (cm->frame_type == KEY_FRAME) {
if (!vp9_read_sync_code(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
read_bitdepth_colorspace_sampling(cm, rb);
cm->frame_refs[i].idx = -1;
cm->frame_refs[i].buf = NULL;
setup_frame_size(cm, rb);
cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
cm->reset_frame_context = cm->error_resilient_mode ?
0 : vp9_rb_read_literal(rb, 2);
if (cm->intra_only) {
if (!vp9_read_sync_code(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
if (cm->profile > PROFILE_0) {
read_bitdepth_colorspace_sampling(cm, rb);
} else {
// NOTE: The intra-only frame header does not include the specification
// of either the color format or color sub-sampling in profile 0. VP9
// specifies that the default color space should be YUV 4:2:0 in this
// case (normative).
cm->color_space = BT_601;
cm->subsampling_y = cm->subsampling_x = 1;
}
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
setup_frame_size(cm, rb);
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
for (i = 0; i < REFS_PER_FRAME; ++i) {
const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
RefBuffer *const ref_frame = &cm->frame_refs[i];
ref_frame->idx = idx;
ref_frame->buf = &cm->frame_bufs[idx].buf;
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
setup_frame_size_with_refs(cm, rb);
cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
cm->interp_filter = read_interp_filter(rb);
RefBuffer *const ref_buf = &cm->frame_refs[i];
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
ref_buf->buf->y_crop_width,
ref_buf->buf->y_crop_height,
cm->width, cm->height);
if (vp9_is_scaled(&ref_buf->sf))
vp9_extend_frame_borders(ref_buf->buf);
if (!cm->error_resilient_mode) {
cm->refresh_frame_context = vp9_rb_read_bit(rb);
cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
} else {
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
}
// This flag will be overridden by the call to vp9_setup_past_independence
// below, forcing the use of context 0 for those frame types.
cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
if (frame_is_intra_only(cm) || cm->error_resilient_mode)
vp9_setup_past_independence(cm);
setup_loopfilter(&cm->lf, rb);
setup_quantization(cm, &pbi->mb, rb);
setup_segmentation(&cm->seg, rb);
setup_tile_info(cm, rb);
if (sz == 0)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Invalid header size");
return sz;
static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
FRAME_CONTEXT *const fc = &cm->fc;
if (vp9_reader_init(&r, data, partition_size, pbi->decrypt_cb,
pbi->decrypt_state))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
if (cm->tx_mode == TX_MODE_SELECT)
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
for (k = 0; k < SKIP_CONTEXTS; ++k)
vp9_diff_update_prob(&r, &fc->skip_probs[k]);
if (!frame_is_intra_only(cm)) {
nmv_context *const nmvc = &fc->nmvc;
int i, j;
read_inter_mode_probs(fc, &r);
if (cm->interp_filter == SWITCHABLE)
read_switchable_interp_probs(fc, &r);
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]);
cm->reference_mode = read_frame_reference_mode(cm, &r);
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
read_frame_reference_mode_probs(cm, &r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < INTRA_MODES - 1; ++i)
vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
for (j = 0; j < PARTITION_CONTEXTS; ++j)
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp9_diff_update_prob(&r, &fc->partition_prob[j][i]);
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
return vp9_reader_has_error(&r);
}
void vp9_init_dequantizer(VP9_COMMON *cm) {
int q;
for (q = 0; q < QINDEX_RANGE; q++) {
cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q);
cm->y_dequant[q][1] = vp9_ac_quant(q, 0);
cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q);
cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q);
}
}
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
#ifdef NDEBUG
#define debug_check_frame_counts(cm) (void)0
#else // !NDEBUG
// Counts should only be incremented when frame_parallel_decoding_mode and
// error_resilient_mode are disabled.
static void debug_check_frame_counts(const VP9_COMMON *const cm) {
FRAME_COUNTS zero_counts;
vp9_zero(zero_counts);
assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode);
assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
sizeof(cm->counts.y_mode)));
assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
sizeof(cm->counts.uv_mode)));
assert(!memcmp(cm->counts.partition, zero_counts.partition,
sizeof(cm->counts.partition)));
assert(!memcmp(cm->counts.coef, zero_counts.coef,
sizeof(cm->counts.coef)));
assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
sizeof(cm->counts.eob_branch)));
assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
sizeof(cm->counts.switchable_interp)));
assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
sizeof(cm->counts.inter_mode)));
assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
sizeof(cm->counts.intra_inter)));
assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
sizeof(cm->counts.comp_inter)));
assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref,
sizeof(cm->counts.single_ref)));
assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
sizeof(cm->counts.comp_ref)));
assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
}
#endif // NDEBUG
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
static struct vp9_read_bit_buffer* init_read_bit_buffer(
VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb,
const uint8_t *data,
const uint8_t *data_end,
uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
rb->bit_offset = 0;
rb->error_handler = error_handler;
rb->error_handler_data = &pbi->common;
if (pbi->decrypt_cb) {
const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n);
rb->bit_buffer = clear_data;
rb->bit_buffer_end = clear_data + n;
} else {
rb->bit_buffer = data;
rb->bit_buffer_end = data_end;
}
return rb;
}
void vp9_decode_frame(VP9Decoder *pbi,
const uint8_t *data, const uint8_t *data_end,
const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
// showing a frame directly
*p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2);
data += vp9_rb_bytes_read(&rb);
if (!read_is_valid(data, first_partition_size, data_end))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt header length");
init_macroblockd(cm, &pbi->mb);
if (!cm->error_resilient_mode)
set_prev_mi(cm);
else
cm->prev_mi = NULL;
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cm->counts);
vp9_zero(xd->dqcoeff);
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
// TODO(jzern): remove frame_parallel_decoding_mode restriction for
// single-frame tile decoding.
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
if (!xd->corrupted) {
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
}
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
new_fb->corrupted |= xd->corrupted;
if (!new_fb->corrupted) {
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(cm);
if (!frame_is_intra_only(cm)) {
vp9_adapt_mode_probs(cm);
vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
} else {
debug_check_frame_counts(cm);
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = cm->fc;