-
Adrian Grange authored
This patch removes the old pred-filter experiment and replaces it with one that is implemented using the switchable filter framework. If the pred-filter experiment is enabled, three interopolation filters are tested during mode selection; the standard 8-tap interpolation filter, a sharp 8-tap filter and a (new) 8-tap smoothing filter. The 6-tap filter code has been preserved for now and if the enable-6tap experiment is enabled (in addition to the pred-filter experiment) the original 6-tap filter replaces the new 8-tap smooth filter in the switchable mode. The new experiment applies the prediction filter in cases of a fractional-pel motion vector. Future patches will apply the filter where the mv is pel-aligned and also to intra predicted blocks. Change-Id: I08e8cba978f2bbf3019f8413f376b8e2cd85eba4
7d6b5425
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9_rtcd.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_common.h"
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
#define INVALID_MV 0x80008000
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
static const int auto_speed_thresh[17] = {
1000,
200,
150,
130,
150,
125,
120,
115,
115,
115,
115,
115,
115,
115,
115,
115,
105
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
};
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{ZEROMV, LAST_FRAME, NONE},
{DC_PRED, INTRA_FRAME, NONE},
{NEARESTMV, LAST_FRAME, NONE},
{NEARMV, LAST_FRAME, NONE},
{ZEROMV, GOLDEN_FRAME, NONE},
{NEARESTMV, GOLDEN_FRAME, NONE},
{ZEROMV, ALTREF_FRAME, NONE},
{NEARESTMV, ALTREF_FRAME, NONE},
{NEARMV, GOLDEN_FRAME, NONE},
{NEARMV, ALTREF_FRAME, NONE},
{V_PRED, INTRA_FRAME, NONE},
{H_PRED, INTRA_FRAME, NONE},
{D45_PRED, INTRA_FRAME, NONE},
{D135_PRED, INTRA_FRAME, NONE},
{D117_PRED, INTRA_FRAME, NONE},
{D153_PRED, INTRA_FRAME, NONE},
{D27_PRED, INTRA_FRAME, NONE},
{D63_PRED, INTRA_FRAME, NONE},
{TM_PRED, INTRA_FRAME, NONE},
{NEWMV, LAST_FRAME, NONE},
{NEWMV, GOLDEN_FRAME, NONE},
{NEWMV, ALTREF_FRAME, NONE},
{SPLITMV, LAST_FRAME, NONE},
{SPLITMV, GOLDEN_FRAME, NONE},
{SPLITMV, ALTREF_FRAME, NONE},
{B_PRED, INTRA_FRAME, NONE},
{I8X8_PRED, INTRA_FRAME, NONE},
/* compound prediction modes */
{ZEROMV, LAST_FRAME, GOLDEN_FRAME},
{NEARESTMV, LAST_FRAME, GOLDEN_FRAME},
{NEARMV, LAST_FRAME, GOLDEN_FRAME},
{ZEROMV, ALTREF_FRAME, LAST_FRAME},
{NEARESTMV, ALTREF_FRAME, LAST_FRAME},
{NEARMV, ALTREF_FRAME, LAST_FRAME},
{ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
{NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
{NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
{NEWMV, LAST_FRAME, GOLDEN_FRAME},
{NEWMV, ALTREF_FRAME, LAST_FRAME },
{NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
{SPLITMV, LAST_FRAME, GOLDEN_FRAME},
{SPLITMV, ALTREF_FRAME, LAST_FRAME },
{SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
#if CONFIG_COMP_INTERINTRA_PRED
/* compound inter-intra prediction */
{ZEROMV, LAST_FRAME, INTRA_FRAME},
{NEARESTMV, LAST_FRAME, INTRA_FRAME},
{NEARMV, LAST_FRAME, INTRA_FRAME},
{NEWMV, LAST_FRAME, INTRA_FRAME},
{ZEROMV, GOLDEN_FRAME, INTRA_FRAME},
{NEARESTMV, GOLDEN_FRAME, INTRA_FRAME},
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
{NEARMV, GOLDEN_FRAME, INTRA_FRAME},
{NEWMV, GOLDEN_FRAME, INTRA_FRAME},
{ZEROMV, ALTREF_FRAME, INTRA_FRAME},
{NEARESTMV, ALTREF_FRAME, INTRA_FRAME},
{NEARMV, ALTREF_FRAME, INTRA_FRAME},
{NEWMV, ALTREF_FRAME, INTRA_FRAME},
#endif
};
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs *p,
int block_type_counts) {
int i, j, k;
for (i = 0; i < block_type_counts; i++)
for (j = 0; j < COEF_BANDS; j++)
for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
vp9_cost_tokens_skip((int *)(c[i][j][k]),
p[i][j][k],
vp9_coef_tree);
else
vp9_cost_tokens((int *)(c[i][j][k]),
p[i][j][k],
vp9_coef_tree);
}
}
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, };
// 3* dc_qlookup[Q]*dc_qlookup[Q];
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];
void vp9_init_me_luts() {
int i;
// Initialize the sad lut tables using a formulaic calculation for now
// This is to make it easier to resolve the impact of experimental changes
// to the quantizer tables.
for (i = 0; i < QINDEX_RANGE; i++) {
sad_per_bit16lut[i] =
(int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
}
}
static int compute_rd_mult(int qindex) {
int q;
q = vp9_dc_quant(qindex, 0);
return (11 * q * q) >> 6;
}
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
}
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
int q, i;
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
vp9_clear_system_state(); // __asm emms;
// Further tests required to see if optimum is different
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
cpi->RDMULT = compute_rd_mult(QIndex);
// Extend rate multiplier along side quantizer zbin increases
if (cpi->zbin_over_quant > 0) {
double oq_factor;
// Experimental code using the same basic equation as used for Q above
// The units of cpi->zbin_over_quant are 1/128 of Q bin size
oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
}
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
else
cpi->RDMULT +=
(cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
}
if (cpi->RDMULT < 7)
cpi->RDMULT = 7;
cpi->mb.errorperbit = (cpi->RDMULT / 110);
cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
vp9_set_speed_features(cpi);
q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
q = q << 2;
cpi->RDMULT = cpi->RDMULT << 4;
if (q < 8)
q = 8;
if (cpi->RDMULT > 1000) {
cpi->RDDIV = 1;
cpi->RDMULT /= 100;
for (i = 0; i < MAX_MODES; i++) {
if (cpi->sf.thresh_mult[i] < INT_MAX) {
cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
} else {
cpi->rd_threshes[i] = INT_MAX;
}
cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
}
} else {
cpi->RDDIV = 100;
for (i = 0; i < MAX_MODES; i++) {
if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
} else {
cpi->rd_threshes[i] = INT_MAX;
}
cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
}
}
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
fill_token_costs(cpi->mb.token_costs[TX_4X4],
cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16);
#if CONFIG_TX32X32
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
#endif
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
vp9_init_mode_costs(cpi);
if (cpi->common.frame_type != KEY_FRAME) {
vp9_build_nmv_cost_table(
cpi->mb.nmvjointcost,
cpi->mb.e_mbd.allow_high_precision_mv ?
cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
&cpi->common.fc.nmvc,
cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
}
}
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
int i, error = 0;
for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff;
}
return error;
}
int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) {
BLOCK *be;
BLOCKD *bd;
int i, j;
int berror, error = 0;
for (i = 0; i < 16; i+=4) {
be = &mb->block[i];
bd = &mb->e_mbd.block[i];
berror = 0;
for (j = dc; j < 64; j++) {
int this_diff = be->coeff[j] - bd->dqcoeff[j];
berror += this_diff * this_diff;
}
error += berror;
}
return error;
}
int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
BLOCK *be;
BLOCKD *bd;
int i, j;
int berror, error = 0;
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
for (i = 0; i < 16; i++) {
be = &mb->block[i];
bd = &mb->e_mbd.block[i];
berror = 0;
for (j = dc; j < 16; j++) {
int this_diff = be->coeff[j] - bd->dqcoeff[j];
berror += this_diff * this_diff;
}
error += berror;
}
return error;
}
int vp9_mbuverror_c(MACROBLOCK *mb) {
BLOCK *be;
BLOCKD *bd;
int i, error = 0;
for (i = 16; i < 24; i++) {
be = &mb->block[i];
bd = &mb->e_mbd.block[i];
error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
}
return error;
}
int vp9_uvsse(MACROBLOCK *x) {
uint8_t *uptr, *vptr;
uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
int uv_stride = x->block[16].src_stride;
unsigned int sse1 = 0;
unsigned int sse2 = 0;
int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
int offset;
int pre_stride = x->e_mbd.block[16].pre_stride;
if (mv_row < 0)
mv_row -= 1;
else
mv_row += 1;
if (mv_col < 0)
mv_col -= 1;
else
mv_col += 1;
mv_row /= 2;
mv_col /= 2;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->e_mbd.pre.u_buffer + offset;
vptr = x->e_mbd.pre.v_buffer + offset;
if ((mv_row | mv_col) & 7) {
vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
(mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
(mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
} else {
vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
}
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
return sse2;
}
#if CONFIG_NEWCOEFCONTEXT
#define PT pn
#else
#define PT pt
#endif
static int cost_coeffs(MACROBLOCK *mb,
BLOCKD *b, PLANE_TYPE type,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
TX_SIZE tx_size) {
int pt;
const int eob = b->eob;
MACROBLOCKD *xd = &mb->e_mbd;
#if CONFIG_TX32X32
const int ib = (int)(b - xd->block);
#endif
int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
int cost = 0, seg_eob;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int *scan, *band;
int16_t *qcoeff_ptr = b->qcoeff;
const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type(xd, b) : DCT_DCT;
#if CONFIG_NEWCOEFCONTEXT
const int *neighbors;
int pn;
#endif
ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
switch (tx_size) {
case TX_4X4:
scan = vp9_default_zig_zag1d_4x4;
band = vp9_coef_bands_4x4;
seg_eob = 16;
if (type == PLANE_TYPE_Y_WITH_DC) {
if (tx_type == ADST_DCT) {
scan = vp9_row_scan_4x4;
} else if (tx_type == DCT_ADST) {
scan = vp9_col_scan_4x4;
}
}
break;
case TX_8X8:
if (type == PLANE_TYPE_Y2) {
scan = vp9_default_zig_zag1d_4x4;
band = vp9_coef_bands_4x4;
seg_eob = 4;
} else {
scan = vp9_default_zig_zag1d_8x8;
band = vp9_coef_bands_8x8;
seg_eob = 64;
}
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
band = vp9_coef_bands_16x16;
seg_eob = 256;
#if CONFIG_TX32X32
if (type == PLANE_TYPE_UV) {
const int uv_idx = ib - 16;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
}
#endif
break;
#if CONFIG_TX32X32
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
band = vp9_coef_bands_32x32;
seg_eob = 1024;
qcoeff_ptr = xd->sb_coeff_data.qcoeff;
break;
#endif
default:
abort();
break;
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
#if CONFIG_NEWCOEFCONTEXT
neighbors = vp9_get_coef_neighbors_handle(scan);
pn = pt;
#endif
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
if (tx_type != DCT_DCT) {
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
cost += mb->hybrid_token_costs[tx_size][type][band[c]][PT][t];
cost += vp9_dct_value_cost_ptr[v];
pt = vp9_prev_token_class[t];
#if CONFIG_NEWCOEFCONTEXT
if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
pn = vp9_get_coef_neighbor_context(
qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
else
pn = pt;
#endif
}
if (c < seg_eob)
cost += mb->hybrid_token_costs[tx_size][type][band[c]]
[PT][DCT_EOB_TOKEN];
} else {
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
cost += mb->token_costs[tx_size][type][band[c]][pt][t];
cost += vp9_dct_value_cost_ptr[v];
pt = vp9_prev_token_class[t];
#if CONFIG_NEWCOEFCONTEXT
if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
pn = vp9_get_coef_neighbor_context(
qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
else
pn = pt;
#endif
}
if (c < seg_eob)
cost += mb->token_costs[tx_size][type][band[c]]
[PT][DCT_EOB_TOKEN];
}
// is eob first coefficient;
pt = (c > !type);
*a = *l = pt;
return cost;
}
static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (backup) {
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
} else {
ta = (ENTROPY_CONTEXT *)xd->above_context;
tl = (ENTROPY_CONTEXT *)xd->left_context;
}
for (b = 0; b < 16; b++)
cost += cost_coeffs(mb, xd->block + b,
(has_2nd_order ?
PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
if (has_2nd_order)
cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
ta + vp9_block2above[TX_4X4][24],
tl + vp9_block2left[TX_4X4][24],
TX_4X4);
return cost;
}
static void macro_block_yrd_4x4(MACROBLOCK *mb,
int *Rate,
int *Distortion,
int *skippable, int backup) {
MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK *const mb_y2 = mb->block + 24;
BLOCKD *const x_y2 = xd->block + 24;
int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
has_2nd_order = get_2nd_order_usage(xd);
// Fdct and building the 2nd order block
vp9_transform_mby_4x4(mb);
vp9_quantize_mby_4x4(mb);
d = vp9_mbblock_error(mb, has_2nd_order);
if (has_2nd_order)
d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
*Distortion = (d >> 2);
// rate
*Rate = rdcost_mby_4x4(mb, has_2nd_order, backup);
*skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order);
}
static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (backup) {
vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
} else {
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
}
for (b = 0; b < 16; b += 4)
cost += cost_coeffs(mb, xd->block + b,
(has_2nd_order ?
PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b],
TX_8X8);
if (has_2nd_order)
cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
ta + vp9_block2above[TX_8X8][24],
tl + vp9_block2left[TX_8X8][24],
TX_8X8);
return cost;
}
static void macro_block_yrd_8x8(MACROBLOCK *mb,
int *Rate,
int *Distortion,
int *skippable, int backup) {
MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK *const mb_y2 = mb->block + 24;
BLOCKD *const x_y2 = xd->block + 24;
int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
vp9_transform_mby_8x8(mb);
vp9_quantize_mby_8x8(mb);
has_2nd_order = get_2nd_order_usage(xd);
d = vp9_mbblock_error_8x8_c(mb, has_2nd_order);
if (has_2nd_order)
d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
*Distortion = (d >> 2);
// rate
*Rate = rdcost_mby_8x8(mb, has_2nd_order, backup);
*skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order);
}
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
int cost;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
} else {
ta = (ENTROPY_CONTEXT *)xd->above_context;
tl = (ENTROPY_CONTEXT *)xd->left_context;
}
cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
return cost;
}
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
int *skippable, int backup) {
int d;
MACROBLOCKD *xd = &mb->e_mbd;
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
xd->mode_info_context->mbmi.txfm_size = TX_16X16;
vp9_transform_mby_16x16(mb);
vp9_quantize_mby_16x16(mb);
// TODO(jingning) is it possible to quickly determine whether to force
// trailing coefficients to be zero, instead of running trellis
// optimization in the rate-distortion optimization loop?
if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
vp9_optimize_mby_16x16(mb);
d = vp9_mbblock_error(mb, 0);
*Distortion = (d >> 2);
// rate
*Rate = rdcost_mby_16x16(mb, backup);
*skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int *d, int *distortion,
int *s, int *skip,
int64_t txfm_cache[NB_TXFM_MODES],
TX_SIZE max_txfm_size) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = cm->mb_no_coeff_skip ?
vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
int64_t rd[TX_SIZE_MAX_SB][2];
int n, m;
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
for (m = 0; m <= n - (n == max_txfm_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
else
r[n][1] += vp9_cost_one(cm->prob_tx[m]);
}
}
if (cm->mb_no_coeff_skip) {
int s0, s1;
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= max_txfm_size; n++) {
if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
}
} else {
for (n = TX_4X4; n <= max_txfm_size; n++) {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
}
}
#if CONFIG_TX32X32
if (max_txfm_size == TX_32X32 &&
(cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_32X32;
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
} else
#endif
if ( cm->txfm_mode == ALLOW_16X16 ||
#if CONFIG_TX32X32
(max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
#endif
(cm->txfm_mode == TX_MODE_SELECT &&
rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode == ALLOW_8X8 ||
(cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_8X8;
} else {
assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
mbmi->txfm_size = TX_4X4;
}
*distortion = d[mbmi->txfm_size];
*rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
*skip = s[mbmi->txfm_size];
txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
#if CONFIG_TX32X32
txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
if (max_txfm_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
else
#endif
if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
else
txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
rd[TX_4X4][1] : rd[TX_8X8][1];
}
static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
MACROBLOCKD *const xd = &x->e_mbd;
int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
x->block[0].src_stride);
macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
txfm_cache, TX_16X16);
}
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
const unsigned int *p = (const unsigned int *)predictor;
unsigned int *d = (unsigned int *)dst;
d[0] = p[0];
d[4] = p[4];
d[8] = p[8];
d[12] = p[12];
}
#if CONFIG_TX32X32
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
MACROBLOCKD * const xd = &x->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
if (backup) {
ta = (ENTROPY_CONTEXT *) &t_above,
tl = (ENTROPY_CONTEXT *) &t_left;
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
} else {
ta = (ENTROPY_CONTEXT *) xd->above_context;
tl = (ENTROPY_CONTEXT *) xd->left_context;
}
return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
int block_size) {
int i;
int64_t error = 0;
for (i = 0; i < block_size; i++) {
unsigned int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff;
}
return error > INT_MAX ? INT_MAX : error;
}
#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
int *rate, int *distortion, int *skippable,
int backup) {
SUPERBLOCK * const x_sb = &x->sb_coeff_data;
MACROBLOCKD * const xd = &x->e_mbd;
SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
int16_t out[1024];
#endif
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif
#if !CONFIG_DWTDCTHYBRID
*distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
#else
*distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
#endif
#if DEBUG_ERROR
printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
*rate = rdcost_sby_32x32(x, backup);
*skippable = vp9_sby_is_skippable_32x32(&x->e_mbd);
}
#endif
static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
MACROBLOCKD *const xd = &x->e_mbd;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
*orig_above = xd->above_context;
ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
*orig_left = xd->left_context;
for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
r[n][0] = 0;
d[n] = 0;
s[n] = 1;
}
#if CONFIG_TX32X32
vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
dst, dst_y_stride);
super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
#endif
#if DEBUG_ERROR
int err[3] = { 0, 0, 0 };
#endif
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
vp9_subtract_mby_s_c(x->src_diff,
src + x_idx * 16 + y_idx * 16 * src_y_stride,
src_y_stride,
dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
dst_y_stride);
xd->above_context = &t_above[TX_16X16][x_idx];
xd->left_context = &t_left[TX_16X16][y_idx];
macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_16X16] += d_tmp;
r[TX_16X16][0] += r_tmp;
s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_16x16(xd);
err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_4X4][x_idx];
xd->left_context = &t_left[TX_4X4][y_idx];
macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_4X4] += d_tmp;
r[TX_4X4][0] += r_tmp;
s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_4x4(xd);
err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_8X8][x_idx];
xd->left_context = &t_left[TX_8X8][y_idx];
macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_8X8] += d_tmp;
r[TX_8X8][0] += r_tmp;
s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_8x8(xd);
err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
}
#if DEBUG_ERROR
printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
xd->above_context = orig_above;
xd->left_context = orig_left;
}
#if CONFIG_SUPERBLOCKS64
static void super_block_64_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
MACROBLOCKD *const xd = &x->e_mbd;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
*orig_above = xd->above_context;
ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
*orig_left = xd->left_context;
for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
r[n][0] = 0;
d[n] = 0;
s[n] = 1;
}
#if CONFIG_TX32X32
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
xd->above_context = &t_above[TX_32X32][x_idx << 1];
xd->left_context = &t_left[TX_32X32][y_idx << 1];
vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
src + 32 * x_idx + 32 * y_idx * src_y_stride,
src_y_stride,
dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
dst_y_stride);
super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
r[TX_32X32][0] += r_tmp;
d[TX_32X32] += d_tmp;
s[TX_32X32] = s[TX_32X32] && s_tmp;
}
#endif
#if DEBUG_ERROR
int err[3] = { 0, 0, 0 };
#endif
for (n = 0; n < 16; n++) {
int x_idx = n & 3, y_idx = n >> 2;
int r_tmp, d_tmp, s_tmp;
vp9_subtract_mby_s_c(x->src_diff,
src + x_idx * 16 + y_idx * 16 * src_y_stride,
src_y_stride,
dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
dst_y_stride);
xd->above_context = &t_above[TX_16X16][x_idx];
xd->left_context = &t_left[TX_16X16][y_idx];
macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_16X16] += d_tmp;
r[TX_16X16][0] += r_tmp;
s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_16x16(xd);
err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_4X4][x_idx];
1051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
xd->left_context = &t_left[TX_4X4][y_idx];
macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_4X4] += d_tmp;
r[TX_4X4][0] += r_tmp;
s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_4x4(xd);
err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
xd->above_context = &t_above[TX_8X8][x_idx];
xd->left_context = &t_left[TX_8X8][y_idx];
macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
d[TX_8X8] += d_tmp;
r[TX_8X8][0] += r_tmp;
s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
vp9_inverse_transform_mby_8x8(xd);
err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
}
#if DEBUG_ERROR
printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
xd->above_context = orig_above;
xd->left_context = orig_left;
}
#endif // CONFIG_SUPERBLOCKS64
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
const unsigned int *p = (const unsigned int *)predictor;
unsigned int *d = (unsigned int *)dst;
d[0] = p[0];
d[1] = p[1];
d[4] = p[4];
d[5] = p[5];
d[8] = p[8];
d[9] = p[9];
d[12] = p[12];
d[13] = p[13];
d[16] = p[16];
d[17] = p[17];
d[20] = p[20];
d[21] = p[21];
d[24] = p[24];
d[25] = p[25];
d[28] = p[28];
d[29] = p[29];
}
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
BLOCKD *b, B_PREDICTION_MODE *best_mode,
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE *best_second_mode,
int allow_comp,
#endif
int *bmode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int *bestdistortion) {
B_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE mode2;
1121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
#endif
int64_t best_rd = LLONG_MAX;
int rate = 0;
int distortion;
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
TX_TYPE tx_type = DCT_DCT;
TX_TYPE best_tx_type = DCT_DCT;
/*
* The predictor buffer is a 2d buffer with a stride of 16. Create
* a temp buffer that meets the stride requirements, but we are only
* interested in the left 4x4 block
* */
DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4);
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
#if CONFIG_NEWBINTRAMODES
b->bmi.as_mode.context = vp9_find_bpred_context(b);
#endif
for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
#if CONFIG_COMP_INTRA_PRED
for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1));
mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
#endif
int64_t this_rd;
int ratey;
#if CONFIG_NEWBINTRAMODES
if (xd->frame_type == KEY_FRAME) {
if (mode == B_CONTEXT_PRED) continue;
#if CONFIG_COMP_INTRA_PRED
if (mode2 == B_CONTEXT_PRED) continue;
#endif
} else {
if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
mode < B_CONTEXT_PRED)
continue;
#if CONFIG_COMP_INTRA_PRED
if (mode2 >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
mode2 < B_CONTEXT_PRED)
continue;
#endif
}
#endif
b->bmi.as_mode.first = mode;
#if CONFIG_NEWBINTRAMODES
rate = bmode_costs[
mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode];
#else
rate = bmode_costs[mode];
#endif
#if CONFIG_COMP_INTRA_PRED
if (mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
#endif
vp9_intra4x4_predict(b, mode, b->predictor);
#if CONFIG_COMP_INTRA_PRED
} else {
vp9_comp_intra4x4_predict(b, mode, mode2, b->predictor);
#if CONFIG_NEWBINTRAMODES
rate += bmode_costs[
mode2 == B_CONTEXT_PRED ?
mode2 - CONTEXT_PRED_REPLACEMENTS : mode2];
#else
rate += bmode_costs[mode2];
#endif
}
#endif
1191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
vp9_subtract_b(be, b, 16);
b->bmi.as_mode.first = mode;
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
} else {
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
}
tempa = ta;
templ = tl;
ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
rate += ratey;
distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
*bestratey = ratey;
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
best_tx_type = tx_type;
#if CONFIG_COMP_INTRA_PRED
*best_second_mode = mode2;
#endif
*a = tempa;
*l = templ;
copy_predictor(best_predictor, b->predictor);
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
}
#if CONFIG_COMP_INTRA_PRED
}
#endif
}
b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
#if CONFIG_COMP_INTRA_PRED
b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
#endif
// inverse transform
if (best_tx_type != DCT_DCT)
vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
else
xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
return best_rd;
}
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate,
int *rate_y, int *Distortion, int64_t best_rd,
#if CONFIG_COMP_INTRA_PRED
int allow_comp,
#endif
int update_contexts) {
int i;
MACROBLOCKD *const xd = &mb->e_mbd;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
1261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330
ENTROPY_CONTEXT *ta, *tl;
int *bmode_costs;
if (update_contexts) {
ta = (ENTROPY_CONTEXT *)xd->above_context;
tl = (ENTROPY_CONTEXT *)xd->left_context;
} else {
vpx_memcpy(&t_above, xd->above_context,
sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context,
sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
}
xd->mode_info_context->mbmi.mode = B_PRED;
bmode_costs = mb->inter_bmode_costs;
for (i = 0; i < 16; i++) {
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode);
#endif
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
if (xd->frame_type == KEY_FRAME) {
const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
const B_PREDICTION_MODE L = left_block_mode(mic, i);
bmode_costs = mb->bmode_costs[A][L];
}
#if CONFIG_NEWBINTRAMODES
mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd->block + i);
#endif
total_rd += rd_pick_intra4x4block(
cpi, mb, mb->block + i, xd->block + i, &best_mode,
#if CONFIG_COMP_INTRA_PRED
& best_second_mode, allow_comp,
#endif
bmode_costs, ta + vp9_block2above[TX_4X4][i],
tl + vp9_block2left[TX_4X4][i], &r, &ry, &d);
cost += r;
distortion += d;
tot_rate_y += ry;
mic->bmi[i].as_mode.first = best_mode;
#if CONFIG_COMP_INTRA_PRED
mic->bmi[i].as_mode.second = best_second_mode;
#endif
#if 0 // CONFIG_NEWBINTRAMODES
printf("%d %d\n", mic->bmi[i].as_mode.first, mic->bmi[i].as_mode.context);
#endif
if (total_rd >= best_rd)
break;
}
if (total_rd >= best_rd)
return LLONG_MAX;
#if CONFIG_COMP_INTRA_PRED
cost += vp9_cost_bit(128, allow_comp);
#endif
*Rate = cost;
1331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400
*rate_y = tot_rate_y;
*Distortion = distortion;
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
int this_rate, this_rate_tokenonly;
int this_distortion, s;
int64_t best_rd = LLONG_MAX, this_rd;
/* Y Search for 32x32 intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_sby_s(&x->e_mbd);
super_block_yrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, txfm_cache);
this_rate = this_rate_tokenonly +
x->mbmode_cost[x->e_mbd.frame_type]
[x->e_mbd.mode_info_context->mbmi.mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
}
}
x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
return best_rd;
}
#if CONFIG_SUPERBLOCKS64
static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
int this_rate, this_rate_tokenonly;
int this_distortion, s;
int64_t best_rd = INT64_MAX, this_rd;
/* Y Search for 32x32 intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_sb64y_s(&x->e_mbd);
super_block_64_yrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, txfm_cache);
this_rate = this_rate_tokenonly +
x->mbmode_cost[x->e_mbd.frame_type]
[x->e_mbd.mode_info_context->mbmi.mode];
1401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
}
}
x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
return best_rd;
}
#endif // CONFIG_SUPERBLOCKS64
static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *Rate,
int *rate_y,
int *Distortion,
int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
TX_SIZE txfm_size = 0;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
#if CONFIG_COMP_INTRA_PRED
MB_PREDICTION_MODE mode2;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected);
#endif
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
int rate, ratey;
int distortion, skip;
int64_t best_rd = LLONG_MAX;
int64_t this_rd;
int i;
for (i = 0; i < NB_TXFM_MODES; i++)
txfm_cache[i] = LLONG_MAX;
// Y Search for 16x16 intra prediction mode
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int64_t local_txfm_cache[NB_TXFM_MODES];
mbmi->mode = mode;
#if CONFIG_COMP_INTRA_PRED
for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
mbmi->second_mode = mode2;
if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
#endif
vp9_build_intra_predictors_mby(xd);
#if CONFIG_COMP_INTRA_PRED
} else {
continue; // i.e. disable for now
vp9_build_comp_intra_predictors_mby(xd);
}
#endif
macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache);
// FIXME add compoundmode cost
// FIXME add rate for mode2
rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540
if (this_rd < best_rd) {
mode_selected = mode;
txfm_size = mbmi->txfm_size;
#if CONFIG_COMP_INTRA_PRED
mode2_selected = mode2;
#endif
best_rd = this_rd;
*Rate = rate;
*rate_y = ratey;
*Distortion = distortion;
*skippable = skip;
}
for (i = 0; i < NB_TXFM_MODES; i++) {
int64_t adj_rd = this_rd + local_txfm_cache[i] -
local_txfm_cache[cpi->common.txfm_mode];
if (adj_rd < txfm_cache[i]) {
txfm_cache[i] = adj_rd;
}
}
#if CONFIG_COMP_INTRA_PRED
}
#endif
}
mbmi->txfm_size = txfm_size;
mbmi->mode = mode_selected;
#if CONFIG_COMP_INTRA_PRED
mbmi->second_mode = mode2_selected;
#endif
return best_rd;
}
static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
B_PREDICTION_MODE *best_mode,
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE *best_second_mode,
#endif
int *mode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int *bestdistortion) {
MB_PREDICTION_MODE mode;
#if CONFIG_COMP_INTRA_PRED
MB_PREDICTION_MODE mode2;
#endif
MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = LLONG_MAX;
int distortion = 0, rate = 0;
BLOCK *be = x->block + ib;
BLOCKD *b = xd->block + ib;
ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0;
ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0;
/*
* The predictor buffer is a 2d buffer with a stride of 16. Create
* a temp buffer that meets the stride requirements, but we are only
* interested in the left 8x8 block
* */
DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 8);
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16 * 4);
// perform transformation of dimension 8x8
// note the input and output index mapping
int idx = (ib & 0x02) ? (ib + 2) : ib;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610
#if CONFIG_COMP_INTRA_PRED
for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
#endif
int64_t this_rd;
int rate_t = 0;
// FIXME rate for compound mode and second intrapred mode
rate = mode_costs[mode];
b->bmi.as_mode.first = mode;
#if CONFIG_COMP_INTRA_PRED
if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
#endif
vp9_intra8x8_predict(b, mode, b->predictor);
#if CONFIG_COMP_INTRA_PRED
} else {
continue; // i.e. disable for now
vp9_comp_intra8x8_predict(b, mode, mode2, b->predictor);
}
#endif
vp9_subtract_4b_c(be, b, 16);
assert(get_2nd_order_usage(xd) == 0);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, b);
if (tx_type != DCT_DCT)
vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
else
x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
// compute quantization mse of 8x8 block
distortion = vp9_block_error_c((x->block + idx)->coeff,
(xd->block + idx)->dqcoeff, 64);
ta0 = a[vp9_block2above[TX_8X8][idx]];
tl0 = l[vp9_block2left[TX_8X8][idx]];
rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
&ta0, &tl0, TX_8X8);
rate += rate_t;
ta1 = ta0;
tl1 = tl0;
} else {
static const int iblock[4] = {0, 1, 4, 5};
TX_TYPE tx_type;
int i;
ta0 = a[vp9_block2above[TX_4X4][ib]];
ta1 = a[vp9_block2above[TX_4X4][ib + 1]];
tl0 = l[vp9_block2left[TX_4X4][ib]];
tl1 = l[vp9_block2left[TX_4X4][ib + 4]];
distortion = 0;
rate_t = 0;
for (i = 0; i < 4; ++i) {
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
} else {
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
}
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16);
rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
// i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
&ta0, &tl0,
TX_4X4);
1611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680
}
rate += rate_t;
}
distortion >>= 2;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
*bestratey = rate_t;
*bestdistortion = distortion;
besta0 = ta0;
besta1 = ta1;
bestl0 = tl0;
bestl1 = tl1;
best_rd = this_rd;
*best_mode = mode;
#if CONFIG_COMP_INTRA_PRED
*best_second_mode = mode2;
#endif
copy_predictor_8x8(best_predictor, b->predictor);
vpx_memcpy(best_dqcoeff, b->dqcoeff, 64);
vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64);
#if CONFIG_COMP_INTRA_PRED
}
#endif
}
}
b->bmi.as_mode.first = (*best_mode);
#if CONFIG_COMP_INTRA_PRED
b->bmi.as_mode.second = (*best_second_mode);
#endif
vp9_encode_intra8x8(x, ib);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
a[vp9_block2above[TX_8X8][idx]] = besta0;
a[vp9_block2above[TX_8X8][idx] + 1] = besta1;
l[vp9_block2left[TX_8X8][idx]] = bestl0;
l[vp9_block2left[TX_8X8][idx] + 1] = bestl1;
} else {
a[vp9_block2above[TX_4X4][ib]] = besta0;
a[vp9_block2above[TX_4X4][ib + 1]] = besta1;
l[vp9_block2left[TX_4X4][ib]] = bestl0;
l[vp9_block2left[TX_4X4][ib + 4]] = bestl1;
}
return best_rd;
}
static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int *Rate, int *rate_y,
int *Distortion, int64_t best_rd) {
MACROBLOCKD *const xd = &mb->e_mbd;
int i, ib;
int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED];
int distortion = 0;
int tot_rate_y = 0;
long long total_rd = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
int *i8x8mode_costs;
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
xd->mode_info_context->mbmi.mode = I8X8_PRED;
i8x8mode_costs = mb->i8x8_mode_costs;
1681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750
for (i = 0; i < 4; i++) {
MODE_INFO *const mic = xd->mode_info_context;
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode);
#endif
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
ib = vp9_i8x8_block[i];
total_rd += rd_pick_intra8x8block(
cpi, mb, ib, &best_mode,
#if CONFIG_COMP_INTRA_PRED
& best_second_mode,
#endif
i8x8mode_costs, ta, tl, &r, &ry, &d);
cost += r;
distortion += d;
tot_rate_y += ry;
mic->bmi[ib].as_mode.first = best_mode;
#if CONFIG_COMP_INTRA_PRED
mic->bmi[ib].as_mode.second = best_second_mode;
#endif
}
*Rate = cost;
*rate_y = tot_rate_y;
*Distortion = distortion;
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
} else {
ta = (ENTROPY_CONTEXT *)xd->above_context;
tl = (ENTROPY_CONTEXT *)xd->left_context;
}
for (b = 16; b < 24; b++)
cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
return cost;
}
static int64_t rd_inter16x16_uv_4x4(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip,
int do_ctx_backup) {
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
*rate = rd_cost_mbuv_4x4(x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
1751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820
static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
} else {
ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
}
for (b = 16; b < 24; b += 4)
cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_8X8);
return cost;
}
static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip,
int do_ctx_backup) {
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
*rate = rd_cost_mbuv_8x8(x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
#if CONFIG_TX32X32
static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *) &t_above;
tl = (ENTROPY_CONTEXT *) &t_left;
} else {
ta = (ENTROPY_CONTEXT *)xd->above_context;
tl = (ENTROPY_CONTEXT *)xd->left_context;
}
for (b = 16; b < 24; b += 4)
cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_16X16);
return cost;
}
static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate,
int *distortion, int *skip,
int backup) {
1821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890
MACROBLOCKD *const xd = &x->e_mbd;
vp9_transform_sbuv_16x16(x);
vp9_quantize_sbuv_16x16(x);
*rate = rd_cost_sbuv_16x16(x, backup);
*distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024,
xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2;
*skip = vp9_sbuv_is_skippable_16x16(xd);
}
#endif
static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
#if CONFIG_TX32X32
if (mbmi->txfm_size == TX_32X32) {
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1);
} else
#endif
{
int n, r = 0, d = 0;
int skippable = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
memcpy(t_above, xd->above_context, sizeof(t_above));
memcpy(t_left, xd->left_context, sizeof(t_left));
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
int d_tmp, s_tmp, r_tmp;
xd->above_context = ta + x_idx;
xd->left_context = tl + y_idx;
vp9_subtract_mbuv_s_c(x->src_diff,
usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
src_uv_stride,
udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
dst_uv_stride);
if (mbmi->txfm_size == TX_4X4) {
rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0);
} else {
rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0);
}
r += r_tmp;
d += d_tmp;
skippable = skippable && s_tmp;
}
*rate = r;
*distortion = d;
*skip = skippable;
xd->left_context = tl;
xd->above_context = ta;
memcpy(xd->above_context, t_above, sizeof(t_above));
memcpy(xd->left_context, t_left, sizeof(t_left));
1891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960
}
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
#if CONFIG_SUPERBLOCKS64
static void super_block_64_uvrd(MACROBLOCK *x, int *rate,
int *distortion, int *skip);
static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip) {
super_block_64_uvrd(x, rate, distortion, skip);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
#endif // CONFIG_SUPERBLOCKS64
static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int *skip, int fullpixel) {
vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
}
static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
#if CONFIG_COMP_INTRA_PRED
MB_PREDICTION_MODE mode2;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected);
#endif
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
int64_t best_rd = LLONG_MAX;
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
#if CONFIG_COMP_INTRA_PRED
for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
#endif
int rate;
int distortion;
int64_t this_rd;
mbmi->uv_mode = mode;
#if CONFIG_COMP_INTRA_PRED
mbmi->second_uv_mode = mode2;
if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
#endif
vp9_build_intra_predictors_mbuv(&x->e_mbd);
#if CONFIG_COMP_INTRA_PRED
} else {
continue;
vp9_build_comp_intra_predictors_mbuv(&x->e_mbd);
}
#endif
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
rate_to = rd_cost_mbuv_4x4(x, 1);
rate = rate_to
+ x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
1961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030
distortion = vp9_mbuverror(x) / 4;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
skip = vp9_mbuv_is_skippable_4x4(xd);
best_rd = this_rd;
d = distortion;
r = rate;
*rate_tokenonly = rate_to;
mode_selected = mode;
#if CONFIG_COMP_INTRA_PRED
mode2_selected = mode2;
}
#endif
}
}
*rate = r;
*distortion = d;
*skippable = skip;
mbmi->uv_mode = mode_selected;
#if CONFIG_COMP_INTRA_PRED
mbmi->second_uv_mode = mode2_selected;
#endif
}
static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable) {
MACROBLOCKD *xd = &x->e_mbd;
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
int64_t best_rd = LLONG_MAX;
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int rate;
int distortion;
int64_t this_rd;
mbmi->uv_mode = mode;
vp9_build_intra_predictors_mbuv(&x->e_mbd);
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
rate_to = rd_cost_mbuv_8x8(x, 1);
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
distortion = vp9_mbuverror(x) / 4;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
skip = vp9_mbuv_is_skippable_8x8(xd);
best_rd = this_rd;
d = distortion;
r = rate;
*rate_tokenonly = rate_to;
mode_selected = mode;
}
2031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100
}
*rate = r;
*distortion = d;
*skippable = skip;
mbmi->uv_mode = mode_selected;
}
// TODO(rbultje) very similar to rd_inter32x32_uv(), merge?
static void super_block_uvrd(MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
#if CONFIG_TX32X32
if (mbmi->txfm_size == TX_32X32) {
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1);
} else
#endif
{
int d = 0, r = 0, n, s = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context;
ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context;
memcpy(t_above, xd->above_context, sizeof(t_above));
memcpy(t_left, xd->left_context, sizeof(t_left));
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
vp9_subtract_mbuv_s_c(x->src_diff,
usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
src_uv_stride,
udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
dst_uv_stride);
if (mbmi->txfm_size == TX_4X4) {
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
s &= vp9_mbuv_is_skippable_4x4(xd);
} else {
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
s &= vp9_mbuv_is_skippable_8x8(xd);
}
d += vp9_mbuverror(x) >> 2;
xd->above_context = t_above + x_idx;
xd->left_context = t_left + y_idx;
if (mbmi->txfm_size == TX_4X4) {
r += rd_cost_mbuv_4x4(x, 0);
} else {
r += rd_cost_mbuv_8x8(x, 0);
}
}
xd->above_context = ta_orig;
xd->left_context = tl_orig;
*distortion = d;
*rate = r;
2101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170
*skippable = s;
}
}
#if CONFIG_SUPERBLOCKS64
static void super_block_64_uvrd(MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context;
ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context;
int d = 0, r = 0, n, s = 1;
memcpy(t_above, xd->above_context, sizeof(t_above));
memcpy(t_left, xd->left_context, sizeof(t_left));
#if CONFIG_TX32X32
if (mbmi->txfm_size == TX_32X32) {
int n;
*rate = 0;
for (n = 0; n < 4; n++) {
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
usrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
src_uv_stride,
udst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
dst_uv_stride);
xd->above_context = t_above + x_idx * 2;
xd->left_context = t_left + y_idx * 2;
rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
r += r_tmp;
d += d_tmp;
s = s && s_tmp;
}
} else
#endif
{
for (n = 0; n < 16; n++) {
int x_idx = n & 3, y_idx = n >> 2;
vp9_subtract_mbuv_s_c(x->src_diff,
usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
src_uv_stride,
udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
dst_uv_stride);
if (mbmi->txfm_size == TX_4X4) {
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
s &= vp9_mbuv_is_skippable_4x4(xd);
} else {
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
s &= vp9_mbuv_is_skippable_8x8(xd);
}
xd->above_context = t_above + x_idx;
xd->left_context = t_left + y_idx;
2171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240
d += vp9_mbuverror(x) >> 2;
if (mbmi->txfm_size == TX_4X4) {
r += rd_cost_mbuv_4x4(x, 0);
} else {
r += rd_cost_mbuv_8x8(x, 0);
}
}
}
*distortion = d;
*rate = r;
*skippable = s;
xd->left_context = tl_orig;
xd->above_context = ta_orig;
}
#endif // CONFIG_SUPERBLOCKS64
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
int64_t best_rd = LLONG_MAX, this_rd;
int this_rate_tokenonly, this_rate;
int this_distortion, s;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
super_block_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
}
}
x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
return best_rd;
}
#if CONFIG_SUPERBLOCKS64
static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
int *rate_tokenonly,
int *distortion,
int *skippable) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate;
int this_distortion, s;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
2241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310
vp9_build_intra_predictors_sb64uv_s(&x->e_mbd);
super_block_64_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
}
}
x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
return best_rd;
}
#endif // CONFIG_SUPERBLOCKS64
int vp9_cost_mv_ref(VP9_COMP *cpi,
MB_PREDICTION_MODE m,
const int mode_context) {
MACROBLOCKD *xd = &cpi->mb.e_mbd;
int segment_id = xd->mode_info_context->mbmi.segment_id;
// If the mode coding is done entirely at the segment level
// we should not account for it at the per mb level in rd code.
// Note that if the segment level coding is expanded from single mode
// to multiple mode masks as per reference frame coding we will need
// to do something different here.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
VP9_COMMON *pc = &cpi->common;
vp9_prob p [VP9_MVREFS - 1];
assert(NEARESTMV <= m && m <= SPLITMV);
vp9_mv_ref_probs(pc, p, mode_context);
return cost_token(vp9_mv_ref_tree, p,
vp9_mv_ref_encoding_array - NEARESTMV + m);
} else
return 0;
}
void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
x->e_mbd.mode_info_context->mbmi.mode = mb;
x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
}
static int labels2mode(
MACROBLOCK *x,
int const *labelings, int which_label,
B_PREDICTION_MODE this_mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv seg_mvs[MAX_REF_FRAMES - 1],
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int *mvjcost, int *mvcost[2]) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mode_info_context;
MB_MODE_INFO * mbmi = &mic->mbmi;
const int mis = xd->mode_info_stride;
int i, cost = 0, thismvcost = 0;
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
2311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380
as they have not yet made it to the bmi array in our MB_MODE_INFO. */
for (i = 0; i < 16; ++i) {
BLOCKD *const d = xd->block + i;
const int row = i >> 2, col = i & 3;
B_PREDICTION_MODE m;
if (labelings[i] != which_label)
continue;
if (col && labelings[i] == labelings[i - 1])
m = LEFT4X4;
else if (row && labelings[i] == labelings[i - 4])
m = ABOVE4X4;
else {
// the only time we should do costing for new motion vector or mode
// is when we are on a new label (jbb May 08, 2007)
switch (m = this_mode) {
case NEW4X4 :
if (mbmi->second_ref_frame > 0) {
this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int;
this_second_mv->as_int =
seg_mvs[mbmi->second_ref_frame - 1].as_int;
}
thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
102, xd->allow_high_precision_mv);
if (mbmi->second_ref_frame > 0) {
thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
mvjcost, mvcost, 102,
xd->allow_high_precision_mv);
}
break;
case LEFT4X4:
this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : left_block_mv(mic, i);
if (mbmi->second_ref_frame > 0)
this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : left_block_second_mv(mic, i);
break;
case ABOVE4X4:
this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : above_block_mv(mic, i, mis);
if (mbmi->second_ref_frame > 0)
this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : above_block_second_mv(mic, i, mis);
break;
case ZERO4X4:
this_mv->as_int = 0;
if (mbmi->second_ref_frame > 0)
this_second_mv->as_int = 0;
break;
default:
break;
}
if (m == ABOVE4X4) { // replace above with left if same
int_mv left_mv, left_second_mv;
left_second_mv.as_int = 0;
left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int :
left_block_mv(mic, i);
if (mbmi->second_ref_frame > 0)
left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int :
left_block_second_mv(mic, i);
if (left_mv.as_int == this_mv->as_int &&
(mbmi->second_ref_frame <= 0 ||
left_second_mv.as_int == this_second_mv->as_int))
m = LEFT4X4;
}
#if CONFIG_NEWBINTRAMODES
cost = x->inter_bmode_costs[
2381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450
m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m];
#else
cost = x->inter_bmode_costs[m];
#endif
}
d->bmi.as_mv.first.as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
d->bmi.as_mv.second.as_int = this_second_mv->as_int;
x->partition_info->bmi[i].mode = m;
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
}
cost += thismvcost;
return cost;
}
static int64_t encode_inter_mb_segment(MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
int *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
*labelyrate = 0;
*distortion = 0;
for (i = 0; i < 16; i++) {
if (labels[i] == which_label) {
BLOCKD *bd = &x->e_mbd.block[i];
BLOCK *be = &x->block[i];
int thisdistortion;
vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict4x4);
if (xd->mode_info_context->mbmi.second_ref_frame > 0)
vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg4x4);
vp9_subtract_b(be, bd, 16);
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, bd);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][i],
tl + vp9_block2left[TX_4X4][i], TX_4X4);
}
}
*distortion >>= 2;
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
int *distortion,
int64_t *otherrd,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int i, j;
MACROBLOCKD *xd = &x->e_mbd;
const int iblock[4] = { 0, 1, 4, 5 };
int othercost = 0, otherdist = 0;
ENTROPY_CONTEXT_PLANES tac, tlc;
ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac,
*tlcp = (ENTROPY_CONTEXT *) &tlc;
2451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520
if (otherrd) {
memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES));
}
*distortion = 0;
*labelyrate = 0;
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
if (labels[ib] == which_label) {
int idx = (ib & 8) + ((ib & 2) << 1);
BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
int thisdistortion;
vp9_build_inter_predictors4b(xd, bd, 16);
if (xd->mode_info_context->mbmi.second_ref_frame > 0)
vp9_build_2nd_inter_predictors4b(xd, bd, 16);
vp9_subtract_4b_c(be, bd, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
if (otherrd) {
x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
x->quantize_b_8x8(be2, bd2);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_8X8][idx],
tlcp + vp9_block2left[TX_8X8][idx],
TX_8X8);
}
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
be = &x->block[ib + iblock[j]];
x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][ib + iblock[j]],
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
*labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
}
} else /* 8x8 */ {
if (otherrd) {
for (j = 0; j < 4; j += 2) {
BLOCKD *bd = &xd->block[ib + iblock[j]];
BLOCK *be = &x->block[ib + iblock[j]];
x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
}
}
x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
x->quantize_b_8x8(be2, bd2);
2521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][idx],
tl + vp9_block2left[TX_8X8][idx], TX_8X8);
}
}
}
*distortion >>= 2;
if (otherrd) {
otherdist >>= 2;
*otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
}
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
typedef struct {
int_mv *ref_mv, *second_ref_mv;
int_mv mvp;
int64_t segment_rd;
SPLITMV_PARTITIONING_TYPE segment_num;
TX_SIZE txfm_size;
int r;
int d;
int segment_yrate;
B_PREDICTION_MODE modes[16];
int_mv mvs[16], second_mvs[16];
int eobs[16];
int mvthresh;
int *mdcounts;
int_mv sv_mvp[4]; // save 4 mvp from 8x8
int sv_istep[2]; // save 2 initial step_param for 16x8/8x16
} BEST_SEG_INFO;
static __inline
int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
int r = 0;
r |= (mv->as_mv.row >> 3) < x->mv_row_min;
r |= (mv->as_mv.row >> 3) > x->mv_row_max;
r |= (mv->as_mv.col >> 3) < x->mv_col_min;
r |= (mv->as_mv.col >> 3) > x->mv_col_max;
return r;
}
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
SPLITMV_PARTITIONING_TYPE segmentation,
TX_SIZE tx_size, int64_t *otherrds,
int64_t *rds, int *completed,
/* 16 = n_blocks */
int_mv seg_mvs[16 /* n_blocks */]
[MAX_REF_FRAMES - 1]) {
int i, j;
int const *labels;
int br = 0, bd = 0;
B_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
int label_count;
int64_t this_segment_rd = 0, other_segment_rd;
int label_mv_thresh;
int rate = 0;
int sbr = 0, sbd = 0;
2591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660
int segmentyrate = 0;
int best_eobs[16] = { 0 };
vp9_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
ENTROPY_CONTEXT *ta_b, *tl_b;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
v_fn_ptr = &cpi->fn_ptr[segmentation];
labels = vp9_mbsplits[segmentation];
label_count = vp9_mbsplit_count[segmentation];
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
// segments. setting this to 1 would make mv thresh
// roughly equal to what it is for macroblocks
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs,
vp9_mbsplit_encodings + segmentation);
rate += vp9_cost_mv_ref(cpi, SPLITMV,
mbmi->mb_mode_context[mbmi->ref_frame]);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
other_segment_rd = this_segment_rd;
mbmi->txfm_size = tx_size;
for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
int64_t best_label_rd = LLONG_MAX, best_other_rd = LLONG_MAX;
B_PREDICTION_MODE mode_selected = ZERO4X4;
int bestlabelyrate = 0;
// search for the best motion vector on this segment
for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
int64_t this_rd, other_rd;
int distortion;
int labelyrate;
ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
ENTROPY_CONTEXT *ta_s;
ENTROPY_CONTEXT *tl_s;
vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
ta_s = (ENTROPY_CONTEXT *)&t_above_s;
tl_s = (ENTROPY_CONTEXT *)&t_left_s;
// motion search for newmv (single predictor case only)
if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
int sseshift, n;
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
BLOCK *c;
BLOCKD *e;
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
2661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730
if (best_label_rd < label_mv_thresh)
break;
if (cpi->compressor_speed) {
if (segmentation == PARTITIONING_8X16 ||
segmentation == PARTITIONING_16X8) {
bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
if (i == 1 && segmentation == PARTITIONING_16X8)
bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
step_param = bsi->sv_istep[i];
}
// use previous block's result as next block's MV predictor.
if (segmentation == PARTITIONING_4X4 && i > 0) {
bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
if (i == 4 || i == 8 || i == 12)
bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
step_param = 2;
}
}
further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
{
int sadpb = x->sadperbit4;
int_mv mvp_full;
mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
// find first label
n = vp9_mbsplit_offset[segmentation][i];
c = &x->block[n];
e = &x->e_mbd.block[n];
bestsme = vp9_full_pixel_diamond(cpi, x, c, e, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
bsi->ref_mv, &mode_mv[NEW4X4]);
sseshift = segmentation_to_sseshift[segmentation];
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, c, e, &mvp_full,
sadpb, 16, v_fn_ptr,
x->nmvjointcost, x->mvcost,
bsi->ref_mv);
if (thissme < bestsme) {
bestsme = thissme;
mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
}
}
}
if (bestsme < INT_MAX) {
int distortion;
unsigned int sse;
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
bsi->ref_mv, x->errorperbit, v_fn_ptr,
2731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800
x->nmvjointcost, x->mvcost,
&distortion, &sse);
// safe motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
}
} else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
/* NEW4X4 */
/* motion search not completed? Then skip newmv for this block with
* comppred */
if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
continue;
}
}
rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
&second_mode_mv[this_mode], seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost);
// Trap vectors that reach beyond the UMV borders
if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
continue;
}
if (mbmi->second_ref_frame > 0 &&
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
if (segmentation == PARTITIONING_4X4) {
this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
&distortion, ta_s, tl_s);
other_rd = this_rd;
} else {
this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
&distortion, &other_rd,
ta_s, tl_s);
}
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
if (this_rd < best_label_rd) {
sbr = rate;
sbd = distortion;
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
for (j = 0; j < 16; j++)
if (labels[j] == i)
best_eobs[j] = x->e_mbd.block[j].eob;
} else {
for (j = 0; j < 4; j++) {
int ib = vp9_i8x8_block[j], idx = j * 4;
if (labels[ib] == i)
best_eobs[idx] = x->e_mbd.block[idx].eob;
}
}
if (other_rd < best_other_rd)
best_other_rd = other_rd;
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
}
} /*for each 4x4 mode*/