Newer
Older
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_mvref_common.h"
#define INVALID_MV 0x80008000
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
DECLARE_ALIGNED(16, extern const uint8_t,
vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{DC_PRED, INTRA_FRAME, NONE},
{NEARESTMV, LAST_FRAME, NONE},
{NEARMV, LAST_FRAME, NONE},
{ZEROMV, GOLDEN_FRAME, NONE},
{NEARESTMV, GOLDEN_FRAME, NONE},
{ZEROMV, ALTREF_FRAME, NONE},
{NEARESTMV, ALTREF_FRAME, NONE},
{NEARMV, GOLDEN_FRAME, NONE},
{NEARMV, ALTREF_FRAME, NONE},
{V_PRED, INTRA_FRAME, NONE},
{H_PRED, INTRA_FRAME, NONE},
{D45_PRED, INTRA_FRAME, NONE},
{D135_PRED, INTRA_FRAME, NONE},
{D117_PRED, INTRA_FRAME, NONE},
{D153_PRED, INTRA_FRAME, NONE},
{D27_PRED, INTRA_FRAME, NONE},
{D63_PRED, INTRA_FRAME, NONE},
{TM_PRED, INTRA_FRAME, NONE},
{NEWMV, LAST_FRAME, NONE},
{NEWMV, GOLDEN_FRAME, NONE},
{NEWMV, ALTREF_FRAME, NONE},
{SPLITMV, LAST_FRAME, NONE},
{SPLITMV, GOLDEN_FRAME, NONE},
{SPLITMV, ALTREF_FRAME, NONE},
{I4X4_PRED, INTRA_FRAME, NONE},
{ZEROMV, LAST_FRAME, ALTREF_FRAME},
{NEARESTMV, LAST_FRAME, ALTREF_FRAME},
{NEARMV, LAST_FRAME, ALTREF_FRAME},
{ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
{NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
{NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
{2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
TX_SIZE t;
for (t = TX_4X4; t <= TX_32X32; t++)
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
vp9_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
vp9_coef_tree);
#if CONFIG_BALANCED_COEFTREE
// Replace the eob node prob with a very small value so that the
// cost approximately equals the cost without the eob node
probs[1] = 1;
vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
vp9_coef_tree);
assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
}
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, };
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];
void vp9_init_me_luts() {
int i;
// Initialize the sad lut tables using a formulaic calculation for now
// This is to make it easier to resolve the impact of experimental changes
// to the quantizer tables.
for (i = 0; i < QINDEX_RANGE; i++) {
sad_per_bit16lut[i] =
(int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
static int compute_rd_mult(int qindex) {
const int q = vp9_dc_quant(qindex, 0);
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
vp9_clear_system_state(); // __asm emms;
// Further tests required to see if optimum is different
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
qindex = clamp(qindex, 0, MAXQ);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
else
cpi->RDMULT +=
(cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
cpi->mb.errorperbit = cpi->RDMULT >> 6;
vp9_set_speed_features(cpi);
q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
q <<= 2;
if (cpi->RDMULT > 1000) {
cpi->RDDIV = 1;
cpi->RDMULT /= 100;
for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
for (i = 0; i < MAX_MODES; ++i) {
// Threshold here seem unecessarily harsh but fine given actual
// range of values used for cpi->sf.thresh_mult[]
int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
// *4 relates to the scaling of rd_thresh_block_size_factor[]
if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
cpi->rd_threshes[bsize][i] =
cpi->sf.thresh_mult[i] * q *
rd_thresh_block_size_factor[bsize] / (4 * 100);
} else {
cpi->rd_threshes[bsize][i] = INT_MAX;
}
cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
if (cpi->sf.adaptive_rd_thresh)
cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
else
cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
for (i = 0; i < MAX_MODES; i++) {
// Threshold here seem unecessarily harsh but fine given actual
// range of values used for cpi->sf.thresh_mult[]
int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
if (cpi->sf.thresh_mult[i] < thresh_max) {
cpi->rd_threshes[bsize][i] =
cpi->sf.thresh_mult[i] * q *
rd_thresh_block_size_factor[bsize] / 4;
} else {
cpi->rd_threshes[bsize][i] = INT_MAX;
}
cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
if (cpi->sf.adaptive_rd_thresh)
cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
else
cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
vp9_cost_tokens(cpi->mb.partition_cost[i],
cpi->common.fc.partition_prob[cpi->common.frame_type][i],
vp9_partition_tree);
vp9_init_mode_costs(cpi);
if (cpi->common.frame_type != KEY_FRAME) {
vp9_build_nmv_cost_table(
cpi->mb.nmvjointcost,
cpi->mb.e_mbd.allow_high_precision_mv ?
cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
&cpi->common.fc.nmvc,
cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
MB_PREDICTION_MODE m;
for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
cost_token(vp9_sb_mv_ref_tree,
cpi->common.fc.inter_mode_probs[i],
vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
}
static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
return bsize_from_dim_lookup[bwl][bhl];
static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
struct macroblockd_plane *pd) {
return get_block_size(plane_block_width_log2by4(bsize, pd),
plane_block_height_log2by4(bsize, pd));
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
const double *tab1, const double *tab2,
double *v1, double *v2) {
double y = x * inv_step;
int d = (int) y;
if (d >= ntab - 1) {
*v1 = tab1[ntab - 1];
*v2 = tab2[ntab - 1];
*v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
*v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
static void model_rd_norm(double x, double *R, double *D) {
static const int inv_tab_step = 8;
static const int tab_size = 120;
// NOTE: The tables below must be of the same size
//
// This table models the rate for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
// and H(x) is the binary entropy function.
static const double rate_tab[] = {
64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
};
// Normalized distortion
// This table models the normalized distortion for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
// where x = qpstep / sqrt(variance)
// Note the actual distortion is Dn * variance.
static const double dist_tab[] = {
0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
};
/*
assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
assert(sizeof(rate_tab) == sizeof(dist_tab));
*/
linear_interpolate2(x, tab_size, inv_tab_step,
rate_tab, dist_tab, R, D);
}
static void model_rd_from_var_lapndz(int var, int n, int qstep,
int *rate, int64_t *dist) {
// This function models the rate and distortion for a Laplacian
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expressions are in:
// Hang and Chen, "Source Model for transform video coder and its
// application - Part I: Fundamental Theory", IEEE Trans. Circ.
// Sys. for Video Tech., April 1997.
vp9_clear_system_state();
if (var == 0 || n == 0) {
*rate = 0;
*dist = 0;
} else {
double D, R;
double s2 = (double) var / n;
double x = qstep / sqrt(s2);
model_rd_norm(x, &R, &D);
*rate = ((n << 8) * R + 0.5);
*dist = (var * D + 0.5);
}
vp9_clear_system_state();
}
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
int i, rate_sum = 0, dist_sum = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
// TODO(dkovalev) the same code in get_plane_block_size
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
unsigned int sse;
int rate;
int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
}
*out_rate_sum = rate_sum;
*out_dist_sum = dist_sum << 4;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
// TODO(dkovalev) the same code in get_plane_block_size
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
unsigned int sse;
int rate;
int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
pd->dequant[1] >> 3, &rate, &dist);
*out_rate_sum = rate;
*out_dist_sum = dist << 4;
}
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
TX_SIZE tx_size,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
int *out_skip) {
BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const int bw = 4 << bwl;
const int bh = 4 << bhl;
int rate_sum = 0;
int64_t dist_sum = 0;
if (tx_size == TX_4X4) {
bs = BLOCK_4X4;
t = 4;
} else if (tx_size == TX_8X8) {
bs = BLOCK_8X8;
t = 8;
} else if (tx_size == TX_16X16) {
bs = BLOCK_16X16;
t = 16;
} else if (tx_size == TX_32X32) {
bs = BLOCK_32X32;
t = 32;
} else {
assert(0);
}
assert(bs <= get_block_size(bwl, bhl));
*out_skip = 1;
for (j = 0; j < bh; j+=t) {
for (k = 0; k < bw; k+=t) {
int rate;
int64_t dist;
unsigned int sse;
(void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
p->src.stride,
pd->dst.buf + j * pd->dst.stride + k,
pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
&rate, &dist);
rate_sum += rate;
dist_sum += dist;
*out_skip &= (rate < 1024);
}
}
*out_rate_sum = rate_sum;
*out_dist_sum = (dist_sum << 4);
}
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int64_t error = 0, sqcoeff = 0;
for (i = 0; i < block_size; i++) {
error += (unsigned)this_diff * this_diff;
sqcoeff += (unsigned) coeff[i] * coeff[i];
*ssz = sqcoeff;
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
int plane, int block, PLANE_TYPE type,
ENTROPY_CONTEXT *A,
ENTROPY_CONTEXT *L,
MACROBLOCKD *const xd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt;
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
uint8_t token_cache[1024];
// Check for consistency of tx_size with mode info
assert((!type && !plane) || (type && plane));
if (type == PLANE_TYPE_Y_WITH_DC) {
assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
} else {
assert(tx_size == get_uv_tx_size(mbmi));
tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
seg_eob = 16;
scan = get_scan_4x4(tx_type);
case TX_8X8: {
const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
get_tx_type_8x8(xd) : DCT_DCT;
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
scan = get_scan_8x8(tx_type);
seg_eob = 64;
}
case TX_16X16: {
const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
get_tx_type_16x16(xd) : DCT_DCT;
scan = get_scan_16x16(tx_type);
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
}
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
pt = combine_entropy_contexts(above_ec, left_ec);
nb = vp9_get_coef_neighbors_handle(scan);
if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP))
/* sanity check to ensure that we do not have spurious non-zero q values */
if (eob < seg_eob)
assert(qcoeff_ptr[scan[eob]] == 0);
if (eob == 0) {
// single eob token
cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
} else {
int v, prev_t;
// dc token
v = qcoeff_ptr[0];
prev_t = vp9_dct_value_tokens_ptr[v].token;
cost += token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
token_cache[0] = vp9_pt_energy_class[prev_t];
// ac tokens
for (c = 1; c < eob; c++) {
const int rc = scan[c];
const int band = get_coef_band(band_translate, c);
int t;
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[0][get_coef_band(band_translate, c)][pt]
[DCT_EOB_TOKEN];
// is eob first coefficient;
for (pt = 0; pt < (1 << tx_size); pt++) {
A[pt] = L[pt] = c > 0;
return cost;
}
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
struct rdcost_block_args {
VP9_COMMON *cm;
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
TX_SIZE tx_size;
int bw;
int bh;
int rate;
int64_t dist;
int64_t sse;
int64_t best_rd;
int skip;
};
static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
struct rdcost_block_args* args = arg;
MACROBLOCK* const x = args->x;
MACROBLOCKD* const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
int64_t this_sse;
int shift = args->tx_size == TX_32X32 ? 0 : 2;
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
args->sse += this_sse >> shift;
if (x->skip_encode &&
xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> shift;
args->dist += p;
args->sse += p;
}
}
static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
struct rdcost_block_args* args = arg;
int x_idx, y_idx;
MACROBLOCKD * const xd = &args->x->e_mbd;
txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
&y_idx);
args->rate += cost_coeffs(args->cm, args->x, plane, block,
xd->plane[plane].plane_type, args->t_above + x_idx,
args->t_left + y_idx, args->tx_size,
args->bw * args->bh);
}
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD * const xd = &x->e_mbd;
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
const int bw = 1 << bwl, bh = 1 << bhl;
struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
sizeof(ENTROPY_CONTEXT) * bw);
vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
sizeof(ENTROPY_CONTEXT) * bh);
foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
return args.rate;
}
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
int cost = 0, plane;
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
cost += rdcost_plane(cm, x, plane, bsize, tx_size);
}
return cost;
}
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift, int64_t *sse) {
struct macroblockd_plane *p = &x->e_mbd.plane[0];
const int bwl = plane_block_width_log2by4(bsize, p);
const int bhl = plane_block_height_log2by4(bsize, p);
int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
*sse >>= shift;
return e;
}
static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift, int64_t *sse) {
int64_t sum = 0, this_sse;
int plane;
*sse = 0;
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
struct macroblockd_plane *p = &x->e_mbd.plane[plane];
const int bwl = plane_block_width_log2by4(bsize, p);
const int bhl = plane_block_height_log2by4(bsize, p);
sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
*sse += this_sse;
}
*sse >>= shift;
return sum >> shift;
}
static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct encode_b_args encode_args = {args->cm, x, NULL};
if (args->skip)
return;
if (RDCOST(x->rdmult, x->rddiv, args->rate, args->dist) > args->best_rd) {
args->skip = 1;
args->rate = INT_MAX;
args->dist = INT64_MAX;
args->sse = INT64_MAX;
return;
}
if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
else
xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);
dist_block(plane, block, bsize, ss_txfrm_size, args);
rate_block(plane, block, bsize, ss_txfrm_size, args);
}
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
const int bwl = b_width_log2(bsize) - xd->plane[0].subsampling_x;
const int bhl = b_height_log2(bsize) - xd->plane[0].subsampling_y;
const int bw = 1 << bwl, bh = 1 << bhl;
struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
xd->mode_info_context->mbmi.txfm_size = tx_size;
vpx_memcpy(&args.t_above, pd->above_context, sizeof(ENTROPY_CONTEXT) * bw);
vpx_memcpy(&args.t_left, pd->left_context, sizeof(ENTROPY_CONTEXT) * bh);
foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
*distortion = args.dist;
*rate = args.rate;
*sse = args.sse;
*skippable = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
}
static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
BLOCK_SIZE_TYPE bs) {
const TX_SIZE max_txfm_size = TX_32X32
- (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
if (max_txfm_size == TX_32X32 &&
(cm->txfm_mode == ALLOW_32X32 ||
cm->txfm_mode == TX_MODE_SELECT)) {
mbmi->txfm_size = TX_32X32;
} else if (max_txfm_size >= TX_16X16 &&
(cm->txfm_mode == ALLOW_16X16 ||
cm->txfm_mode == ALLOW_32X32 ||
cm->txfm_mode == TX_MODE_SELECT)) {
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode != ONLY_4X4) {
mbmi->txfm_size = TX_8X8;
} else {
mbmi->txfm_size = TX_4X4;
}
super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
&sse[mbmi->txfm_size], ref_best_rd, bs,
mbmi->txfm_size);
cpi->txfm_stepdown_count[0]++;
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int *s, int *skip,
int64_t txfm_cache[NB_TXFM_MODES],
BLOCK_SIZE_TYPE bs) {
const TX_SIZE max_txfm_size = TX_32X32
- (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
int64_t rd[TX_SIZE_MAX_SB][2];
int n, m;
const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
if (r[n][0] == INT_MAX)
continue;
for (m = 0; m <= n - (n == max_txfm_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
r[n][1] += vp9_cost_one(tx_probs[m]);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= max_txfm_size; n++) {
if (d[n] == INT64_MAX) {
rd[n][0] = rd[n][1] = INT64_MAX;
continue;
}
if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
if (max_txfm_size == TX_32X32 &&
(cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_32X32;
} else if (max_txfm_size >= TX_16X16 &&
(cm->txfm_mode == ALLOW_16X16 ||
cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT &&
rd[TX_16X16][1] < rd[TX_8X8][1] &&
rd[TX_16X16][1] < rd[TX_4X4][1]))) {
} else if (cm->txfm_mode == ALLOW_8X8 ||
cm->txfm_mode == ALLOW_16X16 ||
cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_8X8;
} else {
mbmi->txfm_size = TX_4X4;
}
*distortion = d[mbmi->txfm_size];
*rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
*skip = s[mbmi->txfm_size];
txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
if (max_txfm_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
else if (max_txfm_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
rd[TX_4X4][1] : rd[TX_8X8][1];
if (max_txfm_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] &&
rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]) {
cpi->txfm_stepdown_count[0]++;
} else if (max_txfm_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] &&
rd[TX_16X16][1] < rd[TX_4X4][1]) {
cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
} else {
cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
int *s, int *skip, int64_t *sse,
BLOCK_SIZE_TYPE bs,
int *model_used) {
const TX_SIZE max_txfm_size = TX_32X32
- (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
int64_t rd[TX_SIZE_MAX_SB][2];
int n, m;
int s0, s1;
double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
// double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
// for (n = TX_4X4; n <= max_txfm_size; n++)
// r[n][0] = (r[n][0] * scale_r[n]);
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
for (m = 0; m <= n - (n == max_txfm_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
r[n][1] += vp9_cost_one(tx_probs[m]);
}
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= max_txfm_size; n++) {
if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}