Newer
Older
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/common/vp9_header.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_setupintrarecon.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/common/vp9_extend.h"
#include "vp9/common/vp9_modecont.h"
#include "vp9/decoder/vp9_dboolhuff.h"
#include "vp9/common/vp9_tile_common.h"
#define COEFCOUNT_TESTING
#ifdef DEC_DEBUG
int dec_debug = 0;
#endif
static int read_le16(const uint8_t *p) {
return (p[1] << 8) | p[0];
}
static int read_le32(const uint8_t *p) {
return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
}
// len == 0 is not allowed
static int read_is_valid(const uint8_t *start, size_t len,
const uint8_t *end) {
return start + len > start && start + len <= end;
}
static TXFM_MODE read_txfm_mode(vp9_reader *r) {
TXFM_MODE mode = vp9_read_literal(r, 2);
if (mode == ALLOW_32X32)
mode += vp9_read_bit(r);
return mode;
}
static int merge_index(int v, int n, int modulus) {
int max1 = (n - 1 - modulus / 2) / modulus + 1;
if (v < max1) v = v * modulus + modulus / 2;
else {
int w;
v -= max1;
w = v;
v += (v + modulus - modulus / 2) / modulus;
while (v % modulus == modulus / 2 ||
w != v - (v + modulus - modulus / 2) / modulus) v++;
}
return v;
static int inv_remap_prob(int v, int m) {
const int n = 256;
const int modulus = MODULUS_PARAM;
return vp9_inv_recenter_nonneg(v + 1, m);
return n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) {
int delp = vp9_decode_term_subexp(bc, SUBEXP_PARAM, 255);
return (vp9_prob)inv_remap_prob(delp, oldp);
void vp9_init_de_quantizer(VP9D_COMP *pbi) {
VP9_COMMON *const pc = &pbi->common;
pc->y_dequant[q][0] = (int16_t)vp9_dc_quant(q, pc->y1dc_delta_q);
pc->uv_dequant[q][0] = (int16_t)vp9_dc_uv_quant(q, pc->uvdc_delta_q);
/* all the ac values =; */
for (i = 1; i < 16; i++) {
const int rc = vp9_default_zig_zag1d_4x4[i];
pc->y_dequant[q][rc] = (int16_t)vp9_ac_yquant(q);
pc->uv_dequant[q][rc] = (int16_t)vp9_ac_uv_quant(q, pc->uvac_delta_q);
static int get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex) {
// Set the Q baseline allowing for any segment level adjustment
if (vp9_segfeature_active(mb, segment_id, SEG_LVL_ALT_Q)) {
const int data = vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q);
return mb->mb_segment_abs_delta == SEGMENT_ABSDATA ?
data : // Abs value
clamp(base_qindex + data, 0, MAXQ); // Delta value
} else {
return base_qindex;
}
}
static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) {
const int segment_id = mb->mode_info_context->mbmi.segment_id;
const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
mb->block[i].dequant = pc->uv_dequant[qindex];
mb->inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
mb->inv_txm4x4 = vp9_short_iwalsh4x4;
mb->itxm_add = vp9_dequant_idct_add_lossless_c;
mb->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
} else {
mb->inv_txm4x4_1 = vp9_short_idct4x4_1;
mb->inv_txm4x4 = vp9_short_idct4x4;
mb->itxm_add = vp9_dequant_idct_add;
mb->itxm_add_y_block = vp9_dequant_idct_add_y_block;
mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block;
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#if CONFIG_CODE_NONZEROCOUNT
static void propagate_nzcs(VP9_COMMON *cm, MACROBLOCKD *xd) {
MODE_INFO *m = xd->mode_info_context;
BLOCK_SIZE_TYPE sb_type = m->mbmi.sb_type;
const int mis = cm->mode_info_stride;
int n;
if (sb_type == BLOCK_SIZE_SB64X64) {
for (n = 0; n < 16; ++n) {
int i = n >> 2;
int j = n & 3;
if (i == 0 && j == 0) continue;
vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
384 * sizeof(m->mbmi.nzcs[0]));
}
} else if (sb_type == BLOCK_SIZE_SB32X32) {
for (n = 0; n < 4; ++n) {
int i = n >> 1;
int j = n & 1;
if (i == 0 && j == 0) continue;
vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
384 * sizeof(m->mbmi.nzcs[0]));
}
}
}
#endif
/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
* to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
*/
static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, int mb_col) {
MODE_INFO *m = xd->mode_info_context;
BLOCK_SIZE_TYPE sb_type = m->mbmi.sb_type;
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
vp9_build_intra_predictors_sb64uv_s(xd);
vp9_build_intra_predictors_sb64y_s(xd);
vp9_build_intra_predictors_sbuv_s(xd);
vp9_build_intra_predictors_sby_s(xd);
vp9_build_intra_predictors_mbuv_s(xd);
vp9_build_intra_predictors_mby_s(xd);
vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col);
vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col);
vp9_build_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride,
mb_row, mb_col);
#if CONFIG_CODE_NONZEROCOUNT
vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
propagate_nzcs(&pbi->common, xd);
#endif
static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
const TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
xd->block[0].dequant, xd->dst.y_buffer,
xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_stride, xd->plane[0].eobs[0]);
vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant,
xd->dst.y_buffer, xd->dst.y_buffer,
xd->dst.y_stride, xd->dst.y_stride,
xd->plane[0].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.u_buffer,
xd->dst.uv_stride, xd->dst.uv_stride,
xd->plane[1].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant,
xd->dst.v_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->dst.uv_stride,
xd->plane[2].eobs[0]);
}
static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
// luma
// if the first one is DCT_DCT assume all the rest are as well
TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
if (tx_type != DCT_DCT || mode == I8X8_PRED) {
int i;
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
int idx = (ib & 0x02) ? (ib + 2) : ib;
int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
int16_t *dq = xd->block[0].dequant;
uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
int stride = xd->dst.y_stride;
BLOCKD *b = &xd->block[ib];
int i8x8mode = b->bmi.as_mode.first;
vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride);
tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, dst, stride, stride,
vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride,
vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
xd->block[0].dequant,
xd->dst.y_buffer,
xd->dst.y_stride,
xd->dst.y_buffer,
xd->dst.y_stride,
// chroma
if (mode == I8X8_PRED) {
int i;
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
BLOCKD *b = &xd->block[ib];
int i8x8mode = b->bmi.as_mode.first;
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.u_buffer,
xd->dst.uv_stride, xd->dst.uv_stride,
xd->plane[1].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
xd->dst.v_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->dst.uv_stride,
xd->plane[2].eobs[0]);
}
}
static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
TX_TYPE tx_type;
const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
if (mode == I8X8_PRED) {
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
const int iblock[4] = {0, 1, 4, 5};
int j;
BLOCKD *b = &xd->block[ib];
int i8x8mode = b->bmi.as_mode.first;
vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
for (j = 0; j < 4; j++) {
b = &xd->block[ib + iblock[j]];
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride,
b->dst_stride,
xd->plane[0].eobs[ib + iblock[j]]);
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
}
}
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
b = &xd->block[20 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
}
} else if (mode == B_PRED) {
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
#if CONFIG_NEWBINTRAMODES
xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
vp9_find_bpred_context(xd, b);
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
b->dst_stride);
tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride,
b->dst_stride, xd->plane[0].eobs[i]);
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
#endif
vp9_build_intra_predictors_mbuv_s(xd);
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
} else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
xd->itxm_add_y_block(xd->plane[0].qcoeff,
xd->block[0].dequant,
xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_buffer,
xd->dst.y_stride,
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
tx_type = get_tx_type_4x4(xd, i);
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride,
b->dst_stride, xd->plane[0].eobs[i]);
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
*(b->base_dst) + b->dst, b->dst_stride, b->dst_stride,
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer,
xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer,
static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
const int bhl = mb_height_log2(bsize) - 1, bh = 1 << bhl;
const int y_count = bw * bh;
int n;
for (n = 0; n < y_count; n++) {
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
const int x_idx = n & (bw - 1);
const int y_idx = n >> bwl;
const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32);
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024),
mb->block[0].dequant ,
mb->dst.y_buffer + y_offset,
mb->dst.y_buffer + y_offset,
mb->dst.y_stride, mb->dst.y_stride,
mb->plane[0].eobs[n * 64]);
}
}
static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) - 1, bw = (1 << bwl) / 2;
const int bhl = mb_height_log2(bsize) - 1, bh = (1 << bhl) / 2;
const int uv_count = bw * bh;
int n;
for (n = 0; n < uv_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> (bwl - 1);
const int uv_offset = (y_idx * 32) * mb->dst.uv_stride + (x_idx * 32);
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024),
mb->block[16].dequant,
mb->dst.u_buffer + uv_offset,
mb->dst.u_buffer + uv_offset,
mb->dst.uv_stride, mb->dst.uv_stride,
mb->plane[1].eobs[n * 64]);
vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024),
mb->block[20].dequant,
mb->dst.v_buffer + uv_offset,
mb->dst.v_buffer + uv_offset,
mb->dst.uv_stride, mb->dst.uv_stride,
mb->plane[2].eobs[n * 64]);
}
}
static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
const int bhl = mb_height_log2(bsize), bh = 1 << bhl;
const int y_count = bw * bh;
int n;
for (n = 0; n < y_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> bwl;
const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
const TX_TYPE tx_type = get_tx_type_16x16(mb,
(y_idx * (4 * bw) + x_idx) * 4);
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
mb->block[0].dequant ,
mb->dst.y_buffer + y_offset,
mb->dst.y_buffer + y_offset,
mb->dst.y_stride, mb->dst.y_stride,
vp9_dequant_iht_add_16x16_c(tx_type,
BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
mb->block[0].dequant,
mb->dst.y_buffer + y_offset,
mb->dst.y_buffer + y_offset,
mb->dst.y_stride, mb->dst.y_stride,
mb->plane[0].eobs[n * 16]);
}
static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bw = (1 << bwl) / 2;
const int bhl = mb_height_log2(bsize), bh = (1 << bhl) / 2;
const int uv_count = bw * bh;
int n;
assert(bsize >= BLOCK_SIZE_SB32X32);
const int x_idx = n & (bw - 1);
const int y_idx = n >> (bwl - 1);
const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
mb->block[16].dequant,
mb->dst.u_buffer + uv_offset,
mb->dst.u_buffer + uv_offset,
mb->dst.uv_stride, mb->dst.uv_stride,
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
mb->block[20].dequant,
mb->dst.v_buffer + uv_offset,
mb->dst.v_buffer + uv_offset,
mb->dst.uv_stride, mb->dst.uv_stride,
static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl;
const int bhl = mb_height_log2(bsize) + 1, bh = 1 << bhl;
const int y_count = bw * bh;
int n;
// luma
for (n = 0; n < y_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> bwl;
const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
const TX_TYPE tx_type = get_tx_type_8x8(xd,
(y_idx * (2 * bw) + x_idx) * 2);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset,
xd->dst.y_buffer + y_offset,
xd->dst.y_stride, xd->dst.y_stride,
vp9_dequant_iht_add_8x8_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset,
xd->dst.y_buffer + y_offset,
xd->dst.y_stride, xd->dst.y_stride,
xd->plane[0].eobs[n * 4]);
}
static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 1, bw = 1 << (bwl - 1);
const int bhl = mb_height_log2(bsize) + 1, bh = 1 << (bhl - 1);
const int uv_count = bw * bh;
int n;
// chroma
for (n = 0; n < uv_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> (bwl - 1);
const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
xd->block[16].dequant,
xd->dst.u_buffer + uv_offset,
xd->dst.u_buffer + uv_offset,
xd->dst.uv_stride, xd->dst.uv_stride,
vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
xd->block[20].dequant,
xd->dst.v_buffer + uv_offset,
xd->dst.v_buffer + uv_offset,
xd->dst.uv_stride, xd->dst.uv_stride,
static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl;
const int bhl = mb_height_log2(bsize) + 2, bh = 1 << bhl;
const int y_count = bw * bh;
int n;
for (n = 0; n < y_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> bwl;
const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
if (tx_type == DCT_DCT) {
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset,
xd->dst.y_buffer + y_offset,
xd->dst.y_stride, xd->dst.y_stride,
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset,
xd->dst.y_buffer + y_offset,
xd->dst.y_stride,
xd->dst.y_stride,
xd->plane[0].eobs[n]);
}
static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 2, bw = 1 << (bwl - 1);
const int bhl = mb_height_log2(bsize) + 2, bh = 1 << (bhl - 1);
const int uv_count = bw * bh;
int n;
for (n = 0; n < uv_count; n++) {
const int x_idx = n & (bw - 1);
const int y_idx = n >> (bwl - 1);
const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
xd->block[16].dequant,
xd->dst.u_buffer + uv_offset,
xd->dst.u_buffer + uv_offset,
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[n]);
xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
xd->block[20].dequant,
xd->dst.v_buffer + uv_offset,
xd->dst.v_buffer + uv_offset,
xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[n]);
// TODO(jingning): combine luma and chroma dequantization and inverse
// transform into a single function looping over planes.
static void decode_sb_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
decode_sby_32x32(mb, bsize);
if (bsize == BLOCK_SIZE_SB64X64)
decode_sbuv_32x32(mb, bsize);
else
decode_sbuv_16x16(mb, bsize);
}
static void decode_sb_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
decode_sby_16x16(mb, bsize);
if (bsize >= BLOCK_SIZE_SB32X32)
decode_sbuv_16x16(mb, bsize);
else
decode_sbuv_8x8(mb, bsize);
}
static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
BOOL_DECODER* const bc, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
const int bw = 1 << bwl, bh = 1 << bhl;
VP9_COMMON *const pc = &pbi->common;
MODE_INFO *mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(mi->mbmi.sb_type == bsize);
if (pbi->common.frame_type != KEY_FRAME)
vp9_setup_interp_filters(xd, mi->mbmi.interp_filter, pc);
// re-initialize macroblock dequantizer before detokenization
if (xd->segmentation_enabled)
mb_init_dequantizer(pbi, xd);
if (mi->mbmi.mb_skip_coeff) {
vp9_reset_sb_tokens_context(xd, bsize);
// Special case: Force the loopfilter to skip when eobtotal and
// mb_skip_coeff are zero.
skip_recon_mb(pbi, xd, mb_row, mb_col);
// TODO(jingning): need to combine intra/inter predictor functions and
// make them block size independent.
// generate prediction
if (bsize == BLOCK_SIZE_SB64X64) {
assert(bsize == BLOCK_SIZE_SB64X64);
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
vp9_build_intra_predictors_sb64y_s(xd);
vp9_build_intra_predictors_sb64uv_s(xd);
} else {
vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col);
}
assert(bsize == BLOCK_SIZE_SB32X32);
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
vp9_build_intra_predictors_sby_s(xd);
vp9_build_intra_predictors_sbuv_s(xd);
} else {
vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col);
}
// dequantization and idct
eobtotal = vp9_decode_tokens(pbi, xd, bc, bsize);
if (eobtotal == 0) { // skip loopfilter
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows)
mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
}
} else {
switch (xd->mode_info_context->mbmi.txfm_size) {
case TX_32X32:
decode_sby_8x8(xd, bsize);
decode_sbuv_8x8(xd, bsize);
decode_sby_4x4(xd, bsize);
decode_sbuv_4x4(xd, bsize);
break;
default: assert(0);
#if CONFIG_CODE_NONZEROCOUNT
propagate_nzcs(&pbi->common, xd);
#endif
// TODO(jingning): Need to merge SB and MB decoding. The MB decoding currently
// couples special handles on I8x8, B_PRED, and splitmv modes.
static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
const int tx_size = xd->mode_info_context->mbmi.txfm_size;
assert(!xd->mode_info_context->mbmi.sb_type);
// re-initialize macroblock dequantizer before detokenization
if (xd->segmentation_enabled)
mb_init_dequantizer(pbi, xd);
vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
#if CONFIG_NEWBINTRAMODES
if (mode != B_PRED)
#endif
eobtotal = vp9_decode_tokens(pbi, xd, bc, BLOCK_SIZE_MB16X16);
//mode = xd->mode_info_context->mbmi.mode;
if (pbi->common.frame_type != KEY_FRAME)
vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter,
if (eobtotal == 0 &&
mode != B_PRED &&
mode != SPLITMV &&
mode != I8X8_PRED &&
!bool_error(bc)) {
// Special case: Force the loopfilter to skip when eobtotal and
// mb_skip_coeff are zero.
skip_recon_mb(pbi, xd, mb_row, mb_col);
if (dec_debug)
printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size);
#endif
// moved to be performed before detokenization
// if (xd->segmentation_enabled)
// mb_init_dequantizer(pbi, xd);
// do prediction
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
if (mode != I8X8_PRED) {
vp9_build_intra_predictors_mbuv_s(xd);
if (mode != B_PRED)
vp9_build_intra_predictors_mby_s(xd);
if (dec_debug)
printf("Decoding mb: %d %d interp %d\n",
xd->mode_info_context->mbmi.mode, tx_size,
xd->mode_info_context->mbmi.interp_filter);
#endif
vp9_build_inter_predictors_mb_s(xd, mb_row, mb_col);
if (tx_size == TX_16X16) {
decode_16x16(pbi, xd, bc);
} else if (tx_size == TX_8X8) {
decode_8x8(pbi, xd, bc);
} else {
decode_4x4(pbi, xd, bc);
}
#ifdef DEC_DEBUG
if (dec_debug) {
int i, j;
printf("\n");
printf("predictor y\n");
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j++)
printf("%3d ", xd->predictor[i * 16 + j]);
printf("\n");
}
printf("\n");
for (j = 0; j < 16; j++)
printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
printf("\n");
printf("\n");
printf("final u\n");
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++)
printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
printf("\n");
printf("\n");
printf("final v\n");
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++)
printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
printf("\n");
static int get_delta_q(vp9_reader *r, int *dq) {
const int old_value = *dq;
if (vp9_read_bit(r)) { // Update bit
int value = vp9_read_literal(r, 4);
if (vp9_read_bit(r)) // Sign bit
value = -value;
*dq = value;
// Trigger a quantizer update if the delta-q value has changed
}
#ifdef PACKET_TESTING
#include <stdio.h>
FILE *vpxlog = 0;
#endif
static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
const int bh = 1 << mb_height_log2(bsize);
const int bw = 1 << mb_width_log2(bsize);
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const int mb_idx = mb_row * cm->mode_info_stride + mb_col;
const YV12_BUFFER_CONFIG *dst_fb = &cm->yv12_fb[cm->new_fb_idx];
const int recon_yoffset = (16 * mb_row) * dst_fb->y_stride + (16 * mb_col);
const int recon_uvoffset = (8 * mb_row) * dst_fb->uv_stride + (8 * mb_col);
xd->mode_info_context = cm->mi + mb_idx;
xd->mode_info_context->mbmi.sb_type = bsize;
xd->prev_mode_info_context = cm->prev_mi + mb_idx;
xd->above_context = cm->above_context + mb_col;
xd->left_context = cm->left_context + mb_row % 4;
// Distance of Mb to the various image edges. These are specified to 8th pel
// as they are always compared to values that are in 1/8th pel units
set_mb_row(cm, xd, mb_row, bh);
set_mb_col(cm, xd, mb_col, bw);
xd->dst.y_buffer = dst_fb->y_buffer + recon_yoffset;
xd->dst.u_buffer = dst_fb->u_buffer + recon_uvoffset;
xd->dst.v_buffer = dst_fb->v_buffer + recon_uvoffset;
static void set_refs(VP9D_COMP *pbi, int mb_row, int mb_col) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
// Select the appropriate reference frame for this MB
const int fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1];
const YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[fb_idx];
xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
setup_pred_block(&xd->pre, cfg, mb_row, mb_col,
&xd->scale_factor[0], &xd->scale_factor_uv[0]);
xd->corrupted |= cfg->corrupted;
// Select the appropriate reference frame for this MB
const int second_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
const YV12_BUFFER_CONFIG *second_cfg = &cm->yv12_fb[second_fb_idx];
xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1];
xd->scale_factor_uv[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1];
setup_pred_block(&xd->second_pre, second_cfg, mb_row, mb_col,
&xd->scale_factor[1], &xd->scale_factor_uv[1]);
xd->corrupted |= second_cfg->corrupted;
/* Decode a row of Superblocks (2x2 region of MBs) */
static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
VP9_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
// For a SB there are 2 left contexts, each pertaining to a MB row within
vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
for (mb_col = pc->cur_tile_mb_col_start;
mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
if (vp9_read(r, pc->prob_sb64_coded)) {
// SB64 decoding
set_offsets(pbi, BLOCK_SIZE_SB64X64, mb_row, mb_col);
vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r);
set_refs(pbi, mb_row, mb_col);
decode_sb(pbi, xd, mb_row, mb_col, r, BLOCK_SIZE_SB64X64);
const int x_idx_sb = mb_col + 2 * (j % 2);
const int y_idx_sb = mb_row + 2 * (j / 2);
if (y_idx_sb >= pc->mb_rows || x_idx_sb >= pc->mb_cols)
continue; // MB lies outside frame, skip on to next
if (vp9_read(r, pc->prob_sb32_coded)) {
// SB32 decoding
set_offsets(pbi, BLOCK_SIZE_SB32X32, y_idx_sb, x_idx_sb);
vp9_decode_mb_mode_mv(pbi, xd, y_idx_sb, x_idx_sb, r);
set_refs(pbi, y_idx_sb, x_idx_sb);
decode_sb(pbi, xd, y_idx_sb, x_idx_sb, r, BLOCK_SIZE_SB32X32);
// Process the 4 MBs within the SB in the order:
// top-left, top-right, bottom-left, bottom-right
const int x_idx_mb = x_idx_sb + (i % 2);
const int y_idx_mb = y_idx_sb + (i / 2);
if (y_idx_mb >= pc->mb_rows || x_idx_mb >= pc->mb_cols)