• Qt by Nokia's avatar
    Initial import from the monolithic Qt. · 41c8d3db
    Qt by Nokia authored
    This is the beginning of revision history for this module. If you
    want to look at revision history older than this, please refer to the
    Qt Git wiki for how to use Git history grafting. At the time of
    writing, this wiki is located here:
    
    http://qt.gitorious.org/qt/pages/GitIntroductionWithQt
    
    If you have already performed the grafting and you don't see any
    history beyond this commit, try running "git log" with the "--follow"
    argument.
    
    Branched from the monolithic repo, Qt master branch, at commit
    896db169ea224deb96c59ce8af800d019de63f12
    41c8d3db
vp9_rdopt.c 122.17 KiB
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9_rtcd.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_common.h"
#define INVALID_MV 0x80008000
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
{D27_PRED, INTRA_FRAME, NONE}, {D63_PRED, INTRA_FRAME, NONE}, {TM_PRED, INTRA_FRAME, NONE}, {NEWMV, LAST_FRAME, NONE}, {NEWMV, GOLDEN_FRAME, NONE}, {NEWMV, ALTREF_FRAME, NONE}, {SPLITMV, LAST_FRAME, NONE}, {SPLITMV, GOLDEN_FRAME, NONE}, {SPLITMV, ALTREF_FRAME, NONE}, {I4X4_PRED, INTRA_FRAME, NONE}, /* compound prediction modes */ {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, {NEARESTMV, LAST_FRAME, GOLDEN_FRAME}, {NEARMV, LAST_FRAME, GOLDEN_FRAME}, {ZEROMV, ALTREF_FRAME, LAST_FRAME}, {NEARESTMV, ALTREF_FRAME, LAST_FRAME}, {NEARMV, ALTREF_FRAME, LAST_FRAME}, {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, {NEWMV, LAST_FRAME, GOLDEN_FRAME}, {NEWMV, ALTREF_FRAME, LAST_FRAME }, {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, {SPLITMV, LAST_FRAME, GOLDEN_FRAME}, {SPLITMV, ALTREF_FRAME, LAST_FRAME }, {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, }; static void fill_token_costs(vp9_coeff_count *c, #if CONFIG_MODELCOEFPROB vp9_coeff_probs_model *p, #else vp9_coeff_probs *p, #endif TX_SIZE tx_size) { int i, j, k, l; for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) for (l = 0; l < PREV_COEF_CONTEXTS; l++) { #if CONFIG_MODELCOEFPROB vp9_prob probs[ENTROPY_NODES]; vp9_model_to_full_probs(p[i][j][k][l], i, j, probs); vp9_cost_tokens_skip((int *)c[i][j][k][l], probs, vp9_coef_tree); #else vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l], vp9_coef_tree); #endif } } static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; // 3* dc_qlookup[Q]*dc_qlookup[Q]; /* values are now correlated to quantizer */
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
static int sad_per_bit16lut[QINDEX_RANGE]; static int sad_per_bit4lut[QINDEX_RANGE]; void vp9_init_me_luts() { int i; // Initialize the sad lut tables using a formulaic calculation for now // This is to make it easier to resolve the impact of experimental changes // to the quantizer tables. for (i = 0; i < QINDEX_RANGE; i++) { sad_per_bit16lut[i] = (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); } } static int compute_rd_mult(int qindex) { const int q = vp9_dc_quant(qindex, 0); return (11 * q * q) >> 2; } void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; } void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { int q, i; vp9_clear_system_state(); // __asm emms; // Further tests required to see if optimum is different // for key frames, golden frames and arf frames. // if (cpi->common.refresh_golden_frame || // cpi->common.refresh_alt_ref_frame) qindex = clamp(qindex, 0, MAXQ); cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { if (cpi->twopass.next_iiratio > 31) cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; else cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; } cpi->mb.errorperbit = cpi->RDMULT >> 6; cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); vp9_set_speed_features(cpi); q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25); q <<= 2; if (q < 8) q = 8; if (cpi->RDMULT > 1000) { cpi->RDDIV = 1; cpi->RDMULT /= 100; for (i = 0; i < MAX_MODES; i++) { if (cpi->sf.thresh_mult[i] < INT_MAX) { cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; } else { cpi->rd_threshes[i] = INT_MAX; } cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } else { cpi->RDDIV = 100;
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
for (i = 0; i < MAX_MODES; i++) { if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; } else { cpi->rd_threshes[i] = INT_MAX; } cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } fill_token_costs(cpi->mb.token_costs[TX_4X4], cpi->common.fc.coef_probs_4x4, TX_4X4); fill_token_costs(cpi->mb.token_costs[TX_8X8], cpi->common.fc.coef_probs_8x8, TX_8X8); fill_token_costs(cpi->mb.token_costs[TX_16X16], cpi->common.fc.coef_probs_16x16, TX_16X16); fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, TX_32X32); for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) vp9_cost_tokens(cpi->mb.partition_cost[i], cpi->common.fc.partition_prob[i], vp9_partition_tree); /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp9_init_mode_costs(cpi); if (cpi->common.frame_type != KEY_FRAME) { vp9_build_nmv_cost_table( cpi->mb.nmvjointcost, cpi->mb.e_mbd.allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost, &cpi->common.fc.nmvc, cpi->mb.e_mbd.allow_high_precision_mv, 1, 1); } } int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { int i, error = 0; for (i = 0; i < block_size; i++) { int this_diff = coeff[i] - dqcoeff[i]; error += this_diff * this_diff; } return error; } static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int plane, int block, PLANE_TYPE type, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; int c = 0; int cost = 0, pad; const int *scan, *nb; const int eob = xd->plane[plane].eobs[block]; const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; ENTROPY_CONTEXT above_ec, left_ec; TX_TYPE tx_type = DCT_DCT;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
const int segment_id = xd->mode_info_context->mbmi.segment_id; #if CONFIG_MODELCOEFPROB vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; #else vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES]; #endif int seg_eob, default_eob; uint8_t token_cache[1024]; const uint8_t * band_translate; // Check for consistency of tx_size with mode info assert((!type && !plane) || (type && plane)); if (type == PLANE_TYPE_Y_WITH_DC) { assert(xd->mode_info_context->mbmi.txfm_size == tx_size); } else { TX_SIZE tx_size_uv = get_uv_tx_size(xd); assert(tx_size == tx_size_uv); } switch (tx_size) { case TX_4X4: { tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; #if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref], type, ref, coef_probs); #else coef_probs = cm->fc.coef_probs_4x4[type][ref]; #endif seg_eob = 16; scan = get_scan_4x4(tx_type); band_translate = vp9_coefband_trans_4x4; break; } case TX_8X8: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 1 + b_width_log2(sb_type); const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; above_ec = (A[0] + A[1]) != 0; left_ec = (L[0] + L[1]) != 0; scan = get_scan_8x8(tx_type); #if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref], type, ref, coef_probs); #else coef_probs = cm->fc.coef_probs_8x8[type][ref]; #endif seg_eob = 64; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 2 + b_width_log2(sb_type); const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; scan = get_scan_16x16(tx_type); #if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref], type, ref, coef_probs); #else coef_probs = cm->fc.coef_probs_16x16[type][ref]; #endif seg_eob = 256;
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: scan = vp9_default_zig_zag1d_32x32; #if CONFIG_MODELCOEFPROB vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref], type, ref, coef_probs); #else coef_probs = cm->fc.coef_probs_32x32[type][ref]; #endif seg_eob = 1024; above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; band_translate = vp9_coefband_trans_8x8plus; break; default: abort(); break; } assert(eob <= seg_eob); pt = combine_entropy_contexts(above_ec, left_ec); nb = vp9_get_coef_neighbors_handle(scan, &pad); default_eob = seg_eob; if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; /* sanity check to ensure that we do not have spurious non-zero q values */ if (eob < seg_eob) assert(qcoeff_ptr[scan[eob]] == 0); { for (c = 0; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].token; int band = get_coef_band(band_translate, c); if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v]; if (!c || token_cache[scan[c - 1]]) cost += vp9_cost_bit(coef_probs[band][pt][0], 1); token_cache[scan[c]] = t; } if (c < seg_eob) { if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); cost += mb->token_costs[tx_size][type][ref] [get_coef_band(band_translate, c)] [pt][DCT_EOB_TOKEN]; } } // is eob first coefficient; for (pt = 0; pt < (1 << tx_size); pt++) { A[pt] = L[pt] = c > 0; } return cost; } static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int *d, int *distortion, int *s, int *skip,
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
int64_t txfm_cache[NB_TXFM_MODES], TX_SIZE max_txfm_size) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP); int64_t rd[TX_SIZE_MAX_SB][2]; int n, m; int s0, s1; for (n = TX_4X4; n <= max_txfm_size; n++) { r[n][1] = r[n][0]; for (m = 0; m <= n - (n == max_txfm_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(cm->prob_tx[m]); else r[n][1] += vp9_cost_one(cm->prob_tx[m]); } } assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_txfm_size; n++) { if (s[n]) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } } if (max_txfm_size == TX_32X32 && (cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; } else if (max_txfm_size >= TX_16X16 && (cm->txfm_mode == ALLOW_16X16 || cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || cm->txfm_mode == ALLOW_16X16 || cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_8X8; } else { mbmi->txfm_size = TX_4X4; } *distortion = d[mbmi->txfm_size]; *rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT]; *skip = s[mbmi->txfm_size]; txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; else if (max_txfm_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
else txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? rd[TX_4X4][1] : rd[TX_8X8][1]; } static int block_error(int16_t *coeff, int16_t *dqcoeff, int block_size, int shift) { int i; int64_t error = 0; for (i = 0; i < block_size; i++) { int this_diff = coeff[i] - dqcoeff[i]; error += (unsigned)this_diff * this_diff; } error >>= shift; return error > INT_MAX ? INT_MAX : (int)error; } static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, 16 << (bwl + bhl), shift); } static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); int64_t sum = 0; int plane; for (plane = 1; plane < MAX_MB_PLANE; plane++) { const int subsampling = x->e_mbd.plane[plane].subsampling_x + x->e_mbd.plane[plane].subsampling_y; sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, 16 << (bwl + bhl - subsampling), 0); } sum >>= shift; return sum > INT_MAX ? INT_MAX : (int)sum; } static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x, int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; const int bw = 1 << bwl, bh = 1 << bhl; ENTROPY_CONTEXT t_above[16], t_left[16]; int block, cost; vpx_memcpy(&t_above, xd->plane[plane].above_context, sizeof(ENTROPY_CONTEXT) * bw); vpx_memcpy(&t_left, xd->plane[plane].left_context, sizeof(ENTROPY_CONTEXT) * bh); cost = 0; for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) { int x_idx, y_idx; txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2, &x_idx, &y_idx); cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type, t_above + x_idx, t_left + y_idx, tx_size, bw * bh); } return cost; } static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { int cost = 0, plane; for (plane = 1; plane < MAX_MB_PLANE; plane++) { cost += rdcost_plane(cm, x, plane, bsize, tx_size); } return cost; } static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = tx_size; vp9_xform_quant_sby(cm, x, bsize); *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); *rate = rdcost_plane(cm, x, 0, bsize, tx_size); *skippable = vp9_sby_is_skippable(xd, bsize); } static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, BLOCK_SIZE_TYPE bs, int64_t txfm_cache[NB_TXFM_MODES]) { VP9_COMMON *const cm = &cpi->common; int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; vp9_subtract_sby(x, bs); if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], bs, TX_32X32); if (bs >= BLOCK_SIZE_MB16X16) super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs, TX_16X16); super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, TX_8X8); super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, TX_4X4); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, TX_32X32 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16)); } static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int *bestdistortion) { B_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = INT64_MAX; int rate = 0; int distortion; VP9_COMMON *const cm = &cpi->common; const int src_stride = x->plane[0].src.stride; uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib, x->plane[0].src.buf, src_stride); int16_t* const src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, ib, x->plane[0].src_diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib, xd->plane[0].dst.buf, xd->plane[0].dst.stride); ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; TX_TYPE tx_type = DCT_DCT; TX_TYPE best_tx_type = DCT_DCT; /* * The predictor buffer is a 2d buffer with a stride of 16. Create * a temp buffer that meets the stride requirements, but we are only * interested in the left 4x4 block * */ DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); assert(ib < 4); xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { int64_t this_rd; int ratey; xd->mode_info_context->bmi[ib].as_mode.first = mode; rate = bmode_costs[mode]; vp9_intra4x4_predict(xd, ib, BLOCK_SIZE_SB8X8, mode, dst, xd->plane[0].dst.stride); vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); xd->mode_info_context->bmi[ib].as_mode.first = mode; tx_type = get_tx_type_4x4(xd, ib); if (tx_type != DCT_DCT) { vp9_short_fht4x4(src_diff, coeff, 8, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); } else { x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, ib, tx_type, 16); } tempa = ta; templ = tl; ratey = cost_coeffs(cm, x, 0, ib, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); rate += ratey; distortion = vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 16) >> 2; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; *bestratey = ratey; *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; best_tx_type = tx_type; *a = tempa; *l = templ; vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); } } xd->mode_info_context->bmi[ib].as_mode.first = (B_PREDICTION_MODE)(*best_mode);
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
vp9_intra4x4_predict(xd, ib, BLOCK_SIZE_SB8X8, *best_mode, dst, xd->plane[0].dst.stride); // inverse transform if (best_tx_type != DCT_DCT) { vp9_short_iht4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride, best_tx_type); } else { xd->inv_txm4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride); } return best_rd; } static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion, int64_t best_rd) { int i; MACROBLOCKD *const xd = &mb->e_mbd; #if CONFIG_AB4X4 int cost = 0; #else int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED]; #endif int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[2], t_left[2]; int *bmode_costs; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); xd->mode_info_context->mbmi.mode = I4X4_PRED; bmode_costs = mb->inter_bmode_costs; for (i = 0; i < 4; i++) { const int x_idx = i & 1, y_idx = i >> 1; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); if (xd->frame_type == KEY_FRAME) { const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); const B_PREDICTION_MODE L = left_block_mode(mic, i); bmode_costs = mb->bmode_costs[A][L]; } total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, t_above + x_idx, t_left + y_idx, &r, &ry, &d); cost += r; distortion += d; tot_rate_y += ry; mic->bmi[i].as_mode.first = best_mode; if (total_rd >= best_rd) break; } if (total_rd >= best_rd) return INT64_MAX; *Rate = cost; *rate_y = tot_rate_y;
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
*Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); int i; #if CONFIG_AB4X4 if (bsize < BLOCK_SIZE_SB8X8) { x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; return best_rd; } #endif for (i = 0; i < NB_TXFM_MODES; i++) txfm_cache[i] = INT64_MAX; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_txfm_cache[NB_TXFM_MODES]; x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; } for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd = this_rd + local_txfm_cache[i] - local_txfm_cache[cpi->common.txfm_mode]; if (adj_rd < txfm_cache[i]) { txfm_cache[i] = adj_rd; } } } x->e_mbd.mode_info_context->mbmi.mode = mode_selected; x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx; return best_rd; } static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize, TX_SIZE uv_tx_size) { MACROBLOCKD *const xd = &x->e_mbd;
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
vp9_xform_quant_sbuv(cm, x, bsize); *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); *rate = rdcost_uv(cm, x, bsize, uv_tx_size); *skippable = vp9_sbuv_is_skippable(xd, bsize); } static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_subtract_sbuv(x, bsize); if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, TX_32X32); } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, TX_16X16); } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, TX_8X8); } else { super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, TX_4X4); } } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate; int this_distortion, s; for (mode = DC_PRED; mode <= TM_PRED; mode++) { x->e_mbd.mode_info_context->mbmi.uv_mode = mode; vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, &this_distortion, &s, bsize); this_rate = this_rate_tokenonly + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; } } x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; return best_rd; } int vp9_cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE m, const int mode_context) { MACROBLOCKD *xd = &cpi->mb.e_mbd; int segment_id = xd->mode_info_context->mbmi.segment_id;
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
// Dont account for mode here if segment skip is enabled. if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { VP9_COMMON *pc = &cpi->common; vp9_prob p [VP9_MVREFS - 1]; assert(NEARESTMV <= m && m <= SPLITMV); vp9_mv_ref_probs(pc, p, mode_context); return cost_token(vp9_mv_ref_tree, p, vp9_mv_ref_encoding_array - NEARESTMV + m); } else return 0; } void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { x->e_mbd.mode_info_context->mbmi.mode = mb; x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; } static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label, B_PREDICTION_MODE this_mode, int_mv *this_mv, int_mv *this_second_mv, int_mv seg_mvs[MAX_REF_FRAMES - 1], int_mv *best_ref_mv, int_mv *second_best_ref_mv, int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mode_info_context; MB_MODE_INFO * mbmi = &mic->mbmi; const int mis = xd->mode_info_stride; int i, cost = 0, thismvcost = 0; #if CONFIG_AB4X4 int idx, idy; int bw = 1 << b_width_log2(mbmi->sb_type); int bh = 1 << b_height_log2(mbmi->sb_type); #endif /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array as they have not yet made it to the bmi array in our MB_MODE_INFO. */ for (i = 0; i < 4; ++i) { const int row = i >> 1, col = i & 1; B_PREDICTION_MODE m; if (labelings[i] != which_label) continue; if (col && labelings[i] == labelings[i - 1]) m = LEFT4X4; else if (row && labelings[i] == labelings[i - 2]) m = ABOVE4X4; else { // the only time we should do costing for new motion vector or mode // is when we are on a new label (jbb May 08, 2007) switch (m = this_mode) { case NEW4X4 : if (mbmi->second_ref_frame > 0) { this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; this_second_mv->as_int = seg_mvs[mbmi->second_ref_frame - 1].as_int; } thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, 102, xd->allow_high_precision_mv); if (mbmi->second_ref_frame > 0) { thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, mvjcost, mvcost, 102, xd->allow_high_precision_mv); }
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
break; case LEFT4X4: this_mv->as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) this_second_mv->as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : left_block_second_mv(xd, mic, i); break; case ABOVE4X4: this_mv->as_int = row ? mic->bmi[i - 2].as_mv[0].as_int : above_block_mv(mic, i, mis); if (mbmi->second_ref_frame > 0) this_second_mv->as_int = row ? mic->bmi[i - 2].as_mv[1].as_int : above_block_second_mv(mic, i, mis); break; case ZERO4X4: this_mv->as_int = 0; if (mbmi->second_ref_frame > 0) this_second_mv->as_int = 0; break; default: break; } if (m == ABOVE4X4) { // replace above with left if same int_mv left_mv, left_second_mv; left_second_mv.as_int = 0; left_mv.as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) left_second_mv.as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : left_block_second_mv(xd, mic, i); if (left_mv.as_int == this_mv->as_int && (mbmi->second_ref_frame <= 0 || left_second_mv.as_int == this_second_mv->as_int)) m = LEFT4X4; } cost = x->inter_bmode_costs[m]; } mic->bmi[i].as_mv[0].as_int = this_mv->as_int; if (mbmi->second_ref_frame > 0) mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; x->partition_info->bmi[i].mode = m; x->partition_info->bmi[i].mv.as_int = this_mv->as_int; if (mbmi->second_ref_frame > 0) x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; #if CONFIG_AB4X4 for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { vpx_memcpy(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], &x->partition_info->bmi[i], sizeof(x->partition_info->bmi[i])); } } #endif } cost += thismvcost; return cost; } static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, MACROBLOCK *x, int const *labels,
1051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
int which_label, int *labelyrate, int *distortion, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { int i, k; MACROBLOCKD *xd = &x->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; int bwl = b_width_log2(bsize), bw = 1 << bwl; int bhl = b_height_log2(bsize), bh = 1 << bhl; int idx, idy; #if !CONFIG_AB4X4 bw = 1, bh = 1; #endif *labelyrate = 0; *distortion = 0; for (i = 0; i < 4; i++) { if (labels[i] == which_label) { const int src_stride = x->plane[0].src.stride; uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, x->plane[0].src.buf, src_stride); int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, x->plane[0].src_diff); int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, xd->plane[0].dst.buf, xd->plane[0].dst.stride); int thisdistortion = 0; int thisrate = 0; vp9_build_inter_predictor(pre, xd->plane[0].pre[0].stride, dst, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[0], &xd->scale_factor[0], 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); // TODO(debargha): Make this work properly with the // implicit-compoundinter-weight experiment when implicit // weighting for splitmv modes is turned on. if (xd->mode_info_context->mbmi.second_ref_frame > 0) { uint8_t* const second_pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, xd->plane[0].pre[1].buf, xd->plane[0].pre[1].stride); vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, dst, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[1], &xd->scale_factor[1], 4 * bw, 4 * bh, 1, &xd->subpix); } vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); k = i; for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { k += (idy * 2 + idx); src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
x->plane[0].src_diff); coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, k, DCT_DCT, 16); thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, k, 16), 16); thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, ta + (k & 1), tl + (k >> 1), TX_4X4, 16); } } *distortion += thisdistortion; *labelyrate += thisrate; } } *distortion >>= 2; return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } typedef struct { int_mv *ref_mv, *second_ref_mv; int_mv mvp; int64_t segment_rd; int r; int d; int segment_yrate; B_PREDICTION_MODE modes[4]; int_mv mvs[4], second_mvs[4]; int eobs[4]; int mvthresh; } BEST_SEG_INFO; static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { int r = 0; r |= (mv->as_mv.row >> 3) < x->mv_row_min; r |= (mv->as_mv.row >> 3) > x->mv_row_max; r |= (mv->as_mv.col >> 3) < x->mv_col_min; r |= (mv->as_mv.col >> 3) > x->mv_col_max; return r; } static enum BlockSize get_block_size(int bw, int bh) { if (bw == 4 && bh == 4) return BLOCK_4X4; if (bw == 4 && bh == 8) return BLOCK_4X8; if (bw == 8 && bh == 4) return BLOCK_8X4; if (bw == 8 && bh == 8) return BLOCK_8X8; if (bw == 8 && bh == 16) return BLOCK_8X16; if (bw == 16 && bh == 8) return BLOCK_16X8; if (bw == 16 && bh == 16) return BLOCK_16X16; if (bw == 32 && bh == 32) return BLOCK_32X32; if (bw == 32 && bh == 16) return BLOCK_32X16;
1191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
if (bw == 16 && bh == 32) return BLOCK_16X32; if (bw == 64 && bh == 32) return BLOCK_64X32; if (bw == 32 && bh == 64) return BLOCK_32X64; if (bw == 64 && bh == 64) return BLOCK_64X64; assert(0); return -1; } static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { int i, j; static const int labels[4] = { 0, 1, 2, 3 }; int br = 0, bd = 0; B_PREDICTION_MODE this_mode; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; const int label_count = 4; int64_t this_segment_rd = 0, other_segment_rd; int label_mv_thresh; int rate = 0; int sbr = 0, sbd = 0; int segmentyrate = 0; int best_eobs[4] = { 0 }; BLOCK_SIZE_TYPE bsize = mbmi->sb_type; int bwl = b_width_log2(bsize), bw = 1 << bwl; int bhl = b_height_log2(bsize), bh = 1 << bhl; int idx, idy; vp9_variance_fn_ptr_t *v_fn_ptr; ENTROPY_CONTEXT t_above[4], t_left[4]; ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; #if !CONFIG_AB4X4 bh = 1, bw = 1; #endif vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); #if CONFIG_AB4X4 v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; #else v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; #endif // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on // segments. setting this to 1 would make mv thresh // roughly equal to what it is for macroblocks label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads #if !CONFIG_AB4X4 rate += vp9_cost_mv_ref(cpi, SPLITMV, mbmi->mb_mode_context[mbmi->ref_frame]); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; #endif other_segment_rd = this_segment_rd; for (idy = 0; idy < 2; idy += bh) {
1261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330
for (idx = 0; idx < 2; idx += bw) { // TODO(jingning,rbultje): rewrite the rate-distortion optimization // loop for 4x4/4x8/8x4 block coding #if CONFIG_AB4X4 int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; i = idy * 2 + idx; // search for the best motion vector on this segment for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) { int64_t this_rd; int distortion; int labelyrate; ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); // motion search for newmv (single predictor case only) if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { int sseshift, n; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; const struct buf_2d orig_src = x->plane[0].src; const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ if (best_label_rd < label_mv_thresh) break; if (cpi->compressor_speed) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; if (i == 2) bsi->mvp.as_int = x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; step_param = 2; } } further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; { int sadpb = x->sadperbit4; int_mv mvp_full; mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; // find first label n = i; // adjust src pointer for this segment x->plane[0].src.buf = raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, x->plane[0].src.buf, x->plane[0].src.stride); assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); x->e_mbd.plane[0].pre[0].buf = raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, x->e_mbd.plane[0].pre[0].buf, x->e_mbd.plane[0].pre[0].stride); bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400
sadpb, further_steps, 0, v_fn_ptr, bsi->ref_mv, &mode_mv[NEW4X4]); sseshift = 0; // Should we do a full search (best quality only) if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { /* Check if mvp_full is within the range. */ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, sadpb, 16, v_fn_ptr, x->nmvjointcost, x->mvcost, bsi->ref_mv, n); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEW4X4].as_int = x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; } else { /* The full search result is actually worse so re-instate the * previous best vector */ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = mode_mv[NEW4X4].as_int; } } } if (bestsme < INT_MAX) { int distortion; unsigned int sse; cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], bsi->ref_mv, x->errorperbit, v_fn_ptr, x->nmvjointcost, x->mvcost, &distortion, &sse); // safe motion search result for use in compound prediction seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; } // restore src pointers x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre; } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) { /* NEW4X4 */ /* motion search not completed? Then skip newmv for this block with * comppred */ if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { continue; } } rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], &second_mode_mv[this_mode], seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost, cpi); // Trap vectors that reach beyond the UMV borders if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { continue; } if (mbmi->second_ref_frame > 0 && mv_check_bounds(x, &second_mode_mv[this_mode])) continue;
1401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
this_rd = encode_inter_mb_segment(&cpi->common, x, labels, i, &labelyrate, &distortion, t_above_s, t_left_s); this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); rate += labelyrate; if (this_rd < best_label_rd) { sbr = rate; sbd = distortion; bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; for (j = 0; j < 4; j++) if (labels[j] == i) best_eobs[j] = x->e_mbd.plane[0].eobs[j]; vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); } } /*for each 4x4 mode*/ vpx_memcpy(t_above, t_above_b, sizeof(t_above)); vpx_memcpy(t_left, t_left_b, sizeof(t_left)); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], &second_mode_mv[mode_selected], seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost, cpi); #else int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; i = idy * 2 + idx; // search for the best motion vector on this segment for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { int64_t this_rd; int distortion; int labelyrate; ENTROPY_CONTEXT t_above_s[2], t_left_s[2]; vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); // motion search for newmv (single predictor case only) if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) { int sseshift, n; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; const struct buf_2d orig_src = x->plane[0].src; const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ if (best_label_rd < label_mv_thresh) break; if (cpi->compressor_speed) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; if (i == 2) bsi->mvp.as_int = x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; step_param = 2; }
1471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540
} further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; { int sadpb = x->sadperbit4; int_mv mvp_full; mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; // find first label n = i; // adjust src pointer for this segment x->plane[0].src.buf = raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, x->plane[0].src.buf, x->plane[0].src.stride); assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); x->e_mbd.plane[0].pre[0].buf = raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, x->e_mbd.plane[0].pre[0].buf, x->e_mbd.plane[0].pre[0].stride); bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, bsi->ref_mv, &mode_mv[NEW4X4]); sseshift = 0; // Should we do a full search (best quality only) if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { /* Check if mvp_full is within the range. */ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, sadpb, 16, v_fn_ptr, x->nmvjointcost, x->mvcost, bsi->ref_mv, n); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEW4X4].as_int = x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; } else { /* The full search result is actually worse so re-instate the * previous best vector */ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = mode_mv[NEW4X4].as_int; } } } if (bestsme < INT_MAX) { int distortion; unsigned int sse; cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4], bsi->ref_mv, x->errorperbit, v_fn_ptr, x->nmvjointcost, x->mvcost, &distortion, &sse); // safe motion search result for use in compound prediction seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; } // restore src pointers x->plane[0].src = orig_src;