vp9_rdopt.c 207 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16 17
#include "vp9/common/vp9_pragmas.h"

18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38 39 40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
43
#include "vp9/common/vp9_common.h"
44

John Koleszar's avatar
John Koleszar committed
45 46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47 48
#define INVALID_MV 0x80008000

49 50 51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70 71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73 74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88 89 90 91 92 93 94 95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99 100 101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103 104 105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107 108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110 111 112 113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115 116 117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119 120 121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123 124 125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127 128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148 149
};

150 151
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
152
                             TX_SIZE tx_size) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154

155
  for (i = 0; i < BLOCK_TYPES; i++)
156 157 158
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
159 160 161 162 163 164 165
          vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                               p[i][j][k][l],
                               vp9_coef_tree);
        }
}

#if CONFIG_CODE_NONZEROCOUNT
166
static void fill_nzc_costs(VP9_COMP *cpi, TX_SIZE tx_size) {
167 168
  int nzc_context, r, b, nzc, values;
  int cost[16];
169
  values = (16 << (2 * tx_size)) + 1;
170 171 172 173

  for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) {
    for (r = 0; r < REF_TYPES; ++r) {
      for (b = 0; b < BLOCK_TYPES; ++b) {
174
        unsigned int *nzc_costs;
175
        if (tx_size == TX_4X4) {
176 177 178
          vp9_cost_tokens(cost,
                          cpi->common.fc.nzc_probs_4x4[nzc_context][r][b],
                          vp9_nzc4x4_tree);
179
          nzc_costs = cpi->mb.nzc_costs_4x4[nzc_context][r][b];
180
        } else if (tx_size == TX_8X8) {
181 182 183
          vp9_cost_tokens(cost,
                          cpi->common.fc.nzc_probs_8x8[nzc_context][r][b],
                          vp9_nzc8x8_tree);
184
          nzc_costs = cpi->mb.nzc_costs_8x8[nzc_context][r][b];
185
        } else if (tx_size == TX_16X16) {
186 187 188
          vp9_cost_tokens(cost,
                          cpi->common.fc.nzc_probs_16x16[nzc_context][r][b],
                          vp9_nzc16x16_tree);
189 190
          nzc_costs = cpi->mb.nzc_costs_16x16[nzc_context][r][b];
        } else {
191 192 193
          vp9_cost_tokens(cost,
                          cpi->common.fc.nzc_probs_32x32[nzc_context][r][b],
                          vp9_nzc32x32_tree);
194 195
          nzc_costs = cpi->mb.nzc_costs_32x32[nzc_context][r][b];
        }
196 197 198 199 200

        for (nzc = 0; nzc < values; ++nzc) {
          int e, c, totalcost = 0;
          c = codenzc(nzc);
          totalcost = cost[c];
201 202
          if ((e = vp9_extranzcbits[c])) {
            int x = nzc - vp9_basenzcvalue[c];
203
            while (e--) {
204 205 206 207
              totalcost += vp9_cost_bit(
                  cpi->common.fc.nzc_pcat_probs[nzc_context]
                                               [c - NZC_TOKENS_NOEXTRA][e],
                  ((x >> e) & 1));
208 209
            }
          }
210
          nzc_costs[nzc] = totalcost;
211
        }
212 213 214
      }
    }
  }
John Koleszar's avatar
John Koleszar committed
215
}
216
#endif
John Koleszar's avatar
John Koleszar committed
217

218

219 220 221 222
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
223

224
// 3* dc_qlookup[Q]*dc_qlookup[Q];
225

226
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
227 228 229
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

230
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
231 232 233 234 235 236 237
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
238 239
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
240
  }
Paul Wilkins's avatar
Paul Wilkins committed
241
}
John Koleszar's avatar
John Koleszar committed
242

243
static int compute_rd_mult(int qindex) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
244
  int q = vp9_dc_quant(qindex, 0);
245
  return (11 * q * q) >> 2;
246 247
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
248 249 250
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
251 252
}

253

Dmitry Kovalev's avatar
Dmitry Kovalev committed
254
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
255
  int q, i;
John Koleszar's avatar
John Koleszar committed
256

257
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
258

John Koleszar's avatar
John Koleszar committed
259 260 261 262
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
263
  qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
264

Dmitry Kovalev's avatar
Dmitry Kovalev committed
265
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
266 267 268 269 270
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
271
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
272
  }
273
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
274
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
275

276
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
277

Dmitry Kovalev's avatar
Dmitry Kovalev committed
278 279
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
280 281
  if (q < 8)
    q = 8;
282

John Koleszar's avatar
John Koleszar committed
283 284 285
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
286

John Koleszar's avatar
John Koleszar committed
287 288 289 290 291 292
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
293

John Koleszar's avatar
John Koleszar committed
294
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
295
    }
John Koleszar's avatar
John Koleszar committed
296 297
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
298

John Koleszar's avatar
John Koleszar committed
299 300 301 302 303 304
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
305

John Koleszar's avatar
John Koleszar committed
306
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
307
    }
John Koleszar's avatar
John Koleszar committed
308
  }
John Koleszar's avatar
John Koleszar committed
309

310
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
311
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
312
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
313
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
314
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
315
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
316
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
317
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
318
#if CONFIG_CODE_NONZEROCOUNT
319 320 321 322
  fill_nzc_costs(cpi, TX_4X4);
  fill_nzc_costs(cpi, TX_8X8);
  fill_nzc_costs(cpi, TX_16X16);
  fill_nzc_costs(cpi, TX_32X32);
323
#endif
324

John Koleszar's avatar
John Koleszar committed
325 326
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
327
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
328

329
  if (cpi->common.frame_type != KEY_FRAME) {
330
    vp9_build_nmv_cost_table(
331 332 333 334 335 336
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
337 338
}

339
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
340
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
341

342
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
343 344 345
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
346

John Koleszar's avatar
John Koleszar committed
347
  return error;
John Koleszar's avatar
John Koleszar committed
348 349
}

350
int vp9_mbblock_error_c(MACROBLOCK *mb) {
351
  MACROBLOCKD * const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
352
  BLOCK  *be;
353 354
  int i;
  int error = 0;
John Koleszar's avatar
John Koleszar committed
355

John Koleszar's avatar
John Koleszar committed
356 357
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
358 359
    error += vp9_block_error(be->coeff,
                             BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
John Koleszar's avatar
John Koleszar committed
360 361
  }
  return error;
John Koleszar's avatar
John Koleszar committed
362 363
}

364
int vp9_mbuverror_c(MACROBLOCK *mb) {
365
  MACROBLOCKD * const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
366
  BLOCK  *be;
John Koleszar's avatar
John Koleszar committed
367

368
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
369

370
  for (i = 16; i < 20; i++) {
John Koleszar's avatar
John Koleszar committed
371
    be = &mb->block[i];
372 373 374 375 376 377 378 379 380
    error += vp9_block_error(be->coeff,
                             BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
                             16);
  }
  for (i = 20; i < 24; i++) {
    be = &mb->block[i];
    error += vp9_block_error(be->coeff,
                             BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
                             16);
John Koleszar's avatar
John Koleszar committed
381
  }
John Koleszar's avatar
John Koleszar committed
382

John Koleszar's avatar
John Koleszar committed
383
  return error;
John Koleszar's avatar
John Koleszar committed
384 385
}

386
int vp9_uvsse(MACROBLOCK *x) {
387 388 389
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
390 391 392 393
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
394 395
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
417
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
418
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
419
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
420
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
421 422
    sse2 += sse1;
  } else {
423 424
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
425 426 427
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
428 429
}

430
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
431
                              int ib, PLANE_TYPE type,
432 433
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
434 435
                              TX_SIZE tx_size,
                              int y_blocks) {
436
  MACROBLOCKD *const xd = &mb->e_mbd;
437 438
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
439
  int c = 0;
440 441
  int cost = 0, pad;
  const int *scan, *nb;
442 443
  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
  const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
444 445
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
446
  const int ref = mbmi->ref_frame != INTRA_FRAME;
447
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
448
      mb->token_costs[tx_size][type][ref];
449
  ENTROPY_CONTEXT a_ec, l_ec;
450 451 452 453
  ENTROPY_CONTEXT *const a1 = a +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
  ENTROPY_CONTEXT *const l1 = l +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
454

455
#if CONFIG_CODE_NONZEROCOUNT
456
  const int nzc_used = get_nzc_used(tx_size);
457 458
  int nzc_context = vp9_get_nzc_context(cm, xd, ib);
  unsigned int *nzc_cost;
459
#endif
460
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
461 462 463 464
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
465 466

  // Check for consistency of tx_size with mode info
467
  assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
468 469 470 471 472 473 474
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

475
  switch (tx_size) {
476 477 478
    case TX_4X4: {
      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                              get_tx_type_4x4(xd, ib) : DCT_DCT;
479 480
      a_ec = *a;
      l_ec = *l;
481 482 483
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
#endif
484
      coef_probs = cm->fc.coef_probs_4x4;
485
      seg_eob = 16;
486 487 488 489 490 491
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_4x4;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_4x4;
      } else {
        scan = vp9_default_zig_zag1d_4x4;
492 493
      }
      break;
494
    }
495 496 497 498 499
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
      const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                              get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
500 501
      a_ec = (a[0] + a[1]) != 0;
      l_ec = (l[0] + l[1]) != 0;
502 503 504 505 506 507 508
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_8x8;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_8x8;
      } else {
        scan = vp9_default_zig_zag1d_8x8;
      }
509 510 511
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
#endif
512
      coef_probs = cm->fc.coef_probs_8x8;
513
      seg_eob = 64;
514
      break;
515 516 517 518 519 520 521 522 523 524 525 526 527
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
      const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                              get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_16x16;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_16x16;
      } else {
        scan = vp9_default_zig_zag1d_16x16;
      }
528 529 530
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
#endif
531
      coef_probs = cm->fc.coef_probs_16x16;
532
      seg_eob = 256;
533
      if (type == PLANE_TYPE_UV) {
534 535 536 537 538
        a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
        l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
      } else {
        a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
        l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
539
      }
540
      break;
541
    }
542 543
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
544 545 546
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
#endif
547
      coef_probs = cm->fc.coef_probs_32x32;
548
      seg_eob = 1024;
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
      if (type == PLANE_TYPE_UV) {
        ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
        a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        a_ec = (a[0] + a[1] + a1[0] + a1[1] +
                a2[0] + a2[1] + a3[0] + a3[1]) != 0;
        l_ec = (l[0] + l[1] + l1[0] + l1[1] +
                l2[0] + l2[1] + l3[0] + l3[1]) != 0;
      } else {
        a_ec = (a[0] + a[1] + a[2] + a[3] +
                a1[0] + a1[1] + a1[2] + a1[3]) != 0;
        l_ec = (l[0] + l[1] + l[2] + l[3] +
                l1[0] + l1[1] + l1[2] + l1[3]) != 0;
      }
565
      break;
566
    default:
567
      abort();
568 569
      break;
  }
570
  assert(eob <= seg_eob);
571

572
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
573 574
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
575

576 577
#if CONFIG_CODE_NONZEROCOUNT
  if (!nzc_used)
578
#endif
579 580
    if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
      seg_eob = 0;
581

582 583 584 585
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

586
  {
587 588 589
#if CONFIG_CODE_NONZEROCOUNT
    int nzc = 0;
#endif
590 591
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
592
      int t = vp9_dct_value_tokens_ptr[v].Token;
593 594 595
#if CONFIG_CODE_NONZEROCOUNT
      nzc += (v != 0);
#endif
596
      token_cache[c] = t;
597
      cost += token_costs[get_coef_band(scan, tx_size, c)][pt][t];
598
      cost += vp9_dct_value_cost_ptr[v];
599 600 601
#if !CONFIG_CODE_NONZEROCOUNT
      if (!c || token_cache[c - 1])
        cost += vp9_cost_bit(coef_probs[type][ref]
602
                                       [get_coef_band(scan, tx_size, c)]
603 604
                                       [pt][0], 1);
#endif
605
      pt = vp9_get_coef_context(scan, nb, pad, token_cache, c + 1, default_eob);
606
    }
607
#if CONFIG_CODE_NONZEROCOUNT
608 609 610
    if (nzc_used)
      cost += nzc_cost[nzc];
    else
611
#endif
612 613 614 615
      if (c < seg_eob)
        cost += mb->token_costs[tx_size][type][ref]
                               [get_coef_band(scan, tx_size, c)]
                               [pt][DCT_EOB_TOKEN];
616 617
  }

618
  // is eob first coefficient;
619
  pt = (c > 0);
620
  *a = *l = pt;
621 622 623 624 625 626 627 628 629 630 631 632 633 634
  if (tx_size >= TX_8X8) {
    a[1] = l[1] = pt;
    if (tx_size >= TX_16X16) {
      if (type == PLANE_TYPE_UV) {
        a1[0] = a1[1] = l1[0] = l1[1] = pt;
      } else {
        a[2] = a[3] = l[2] = l[3] = pt;
        if (tx_size >= TX_32X32) {
          a1[0] = a1[1] = a1[2] = a1[3] = pt;
          l1[0] = l1[1] = l1[2] = l1[3] = pt;
        }
      }
    }
  }
635 636 637
  return cost;
}

638
static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
639 640
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
641
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
642
  ENTROPY_CONTEXT_PLANES t_above, t_left;
643 644
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
645

646 647
  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
648

John Koleszar's avatar
John Koleszar committed
649
  for (b = 0; b < 16; b++)
650
    cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
651 652
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
653
                        TX_4X4, 16);
John Koleszar's avatar
John Koleszar committed
654

John Koleszar's avatar
John Koleszar committed
655
  return cost;
John Koleszar's avatar
John Koleszar committed
656 657
}

658 659
static void macro_block_yrd_4x4(VP9_COMMON *const cm,
                                MACROBLOCK *mb,
660 661 662
                                int *rate,
                                int *distortion,
                                int *skippable) {
Paul Wilkins's avatar
Paul Wilkins committed
663
  MACROBLOCKD *const xd = &mb->e_mbd;
664

665 666 667
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
668

669 670
  *distortion = vp9_mbblock_error(mb) >> 2;
  *rate = rdcost_mby_4x4(cm, mb);
671
  *skippable = vp9_mby_is_skippable_4x4(xd);
672
}
John Koleszar's avatar
John Koleszar committed
673

674
static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
675 676
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
677
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
678
  ENTROPY_CONTEXT_PLANES t_above, t_left;
679 680
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
681

682 683
  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
684 685

  for (b = 0; b < 16; b += 4)
686
    cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
687 688
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
689
                        TX_8X8, 16);
John Koleszar's avatar
John Koleszar committed
690 691

  return cost;
692 693
}

694 695
static void macro_block_yrd_8x8(VP9_COMMON *const cm,
                                MACROBLOCK *mb,
696 697 698
                                int *rate,
                                int *distortion,
                                int *skippable) {
Paul Wilkins's avatar
Paul Wilkins committed
699
  MACROBLOCKD *const xd = &mb->e_mbd;
700 701

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
702 703
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
704

705 706
  *distortion = vp9_mbblock_error(mb) >> 2;
  *rate = rdcost_mby_8x8(cm, mb);
707
  *skippable = vp9_mby_is_skippable_8x8(xd);
708
}
709

710 711
static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) {
  MACROBLOCKD *const xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
712
  ENTROPY_CONTEXT_PLANES t_above, t_left;
713 714
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
Daniel Kang's avatar
Daniel Kang committed
715

716 717
  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
Daniel Kang's avatar
Daniel Kang committed
718

719
  return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16, 16);
Daniel Kang's avatar
Daniel Kang committed
720
}
721

722
static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb,
723 724
                                  int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
725

726 727
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
728
  vp9_quantize_mby_16x16(mb);
729 730 731
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
732 733
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
734
    vp9_optimize_mby_16x16(cm, mb);
735

736 737
  *distortion = vp9_mbblock_error(mb) >> 2;
  *rate = rdcost_mby_16x16(cm, mb);
738
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
739 740
}

741
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
742 743 744 745 746
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
747 748 749 750 751
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
752 753 754 755 756 757 758 759 760 761 762 763
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
764 765 766 767 768 769 770 771

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

772
    for (n = TX_4X4; n <= max_txfm_size; n++) {
773
      if (s[n]) {
774
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
775
      } else {
776 777
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
778
      }
779 780
    }
  } else {
781 782 783
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
784 785 786
    }
  }

787 788 789 790 791 792
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
793 794 795 796 797
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
798
    mbmi->txfm_size = TX_16X16;
799
  } else if (cm->txfm_mode == ALLOW_8X8 ||
800
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
801 802
    mbmi->txfm_size = TX_8X8;
  } else {
803
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
804 805 806
    mbmi->txfm_size = TX_4X4;
  }

807
  *distortion = d[mbmi->txfm_size];
808
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
809 810
  *skip       = s[mbmi->txfm_size];

811 812 813 814 815 816 817 818
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
819
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
820
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
821
  else
822 823
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
824 825 826 827 828
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
829
  VP9_COMMON *const cm = &cpi->common;
830
  MACROBLOCKD *const xd = &x->e_mbd;
831
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
832 833 834

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
835

836 837 838
  macro_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
  macro_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
  macro_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
839 840

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
841
                           txfm_cache, TX_16X16);
842 843
}

844
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
845 846 847 848 849 850
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
851
}
852

853
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
854
                                int block_size, int shift) {
855 856 857 858 859 860 861
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
862
  error >>= shift;
863

864
  return error > INT_MAX ? INT_MAX : (int)error;
865 866
}

867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
static int vp9_sb_uv_block_error_c(int16_t *coeff,
                                   int16_t *dqcoeff0, int16_t *dqcoeff1,
                                   int block_size, int shift) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size / 2; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff0[i];
    error += this_diff * this_diff;
  }
  coeff += block_size / 2;
  for (i = 0; i < block_size / 2; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff1[i];
    error += this_diff * this_diff;
  }
  error >>= shift;

  return error > INT_MAX ? INT_MAX : (int)error;
}

887 888 889 890 891 892 893 894 895 896 897 898 899
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 64; b++)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb[TX_4X4][b],
900
                        tl + vp9_block2left_sb[TX_4X4][b], TX_4X4, 64);
901 902 903 904 905 906 907 908 909 910 911 912

  return cost;
}

static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                                int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_sby_4x4(x);
  vp9_quantize_sby_4x4(x);

913
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
  *rate       = rdcost_sby_4x4(cm, x);
  *skippable  = vp9_sby_is_skippable_4x4(xd);
}

static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 64; b += 4)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb[TX_8X8][b],
931
                        tl + vp9_block2left_sb[TX_8X8][b], TX_8X8, 64);
932 933 934 935 936 937 938 939 940 941 942 943

  return cost;
}

static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                                int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
  vp9_transform_sby_8x8(x);
  vp9_quantize_sby_8x8(x);

944
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
  *rate       = rdcost_sby_8x8(cm, x);
  *skippable  = vp9_sby_is_skippable_8x8(xd);
}

static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 64; b += 16)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb[TX_16X16][b],
962
                        tl + vp9_block2left_sb[TX_16X16][b], TX_16X16, 64);
963 964 965 966 967 968 969 970 971 972 973 974

  return cost;
}

static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                                  int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_sby_16x16(x);
  vp9_quantize_sby_16x16(x);

975
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
976 977 978 979 980 981 982 983 984 985 986 987 988
  *rate       = rdcost_sby_16x16(cm, x);
  *skippable  = vp9_sby_is_skippable_16x16(xd);
}

static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

989
  return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32, 64);
990 991
}

992
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
993
                                  int *rate, int *distortion, int *skippable) {
994
  MACROBLOCKD *const xd = &x->e_mbd;
995

996
  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
997 998
  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
999

1000
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0);
1001
  *rate       = rdcost_sby_32x32(cm, x);
1002
  *skippable  = vp9_sby_is_skippable_32x32(xd);
1003 1004
}

1005 1006
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
1007
                            int *skip,
1008
                            int64_t txfm_cache[NB_TXFM_MODES]) {
1009
  VP9_COMMON *const cm = &cpi->common;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1010
  MACROBLOCKD *const xd = &x->e_mbd;
1011
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1012 1013 1014
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;

1015 1016 1017 1018 1019
  vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
  super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
  super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
  super_block_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8]);
  super_block_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4]);
1020 1021 1022

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
}

static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 256; b++)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb64[TX_4X4][b],
1038
                        tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4, 256);
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050

  return cost;
}

static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                                  int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_sb64y_4x4(x);
  vp9_quantize_sb64y_4x4(x);

1051
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
  *rate       = rdcost_sb64y_4x4(cm, x);
  *skippable  = vp9_sb64y_is_skippable_4x4(xd);
}

static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 256; b += 4)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb64[TX_8X8][b],
1069
                        tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8, 256);
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081

  return cost;
}

static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                                  int *rate, int *distortion, int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
  vp9_transform_sb64y_8x8(x);
  vp9_quantize_sb64y_8x8(x);

1082
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
  *rate       = rdcost_sb64y_8x8(cm, x);
  *skippable  = vp9_sb64y_is_skippable_8x8(xd);
}

static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));
1096

1097 1098 1099
  for (b = 0; b < 256; b += 16)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb64[TX_16X16][b],
1100
                        tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16, 256);
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113

  return cost;
}

static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                                    int *rate, int *distortion,
                                    int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_sb64y_16x16(x);
  vp9_quantize_sb64y_16x16(x);

1114
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
  *rate       = rdcost_sb64y_16x16(cm, x);
  *skippable  = vp9_sb64y_is_skippable_16x16(xd);
}

static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
  int cost = 0, b;
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
  ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(t_left));

  for (b = 0; b < 256; b += 64)
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
                        ta + vp9_block2above_sb64[TX_32X32][b],
1132
                        tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32, 256);
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145

  return cost;
}

static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                                    int *rate, int *distortion,
                                    int *skippable) {
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
  vp9_transform_sb64y_32x32(x);
  vp9_quantize_sb64y_32x32(x);

1146
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0);
1147 1148
  *rate       = rdcost_sb64y_32x32(cm, x);
  *skippable  = vp9_sb64y_is_skippable_32x32(xd);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1149
}
1150 1151 1152 1153 1154

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
1155
  VP9_COMMON *const cm = &cpi->common;
1156
  MACROBLOCKD *const xd = &x->e_mbd;
1157
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
1158 1159 1160
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;

1161 1162 1163 1164 1165
  vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
  super_block64_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
  super_block64_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
  super_block64_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8]);
  super_block64_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4]);
1166 1167 1168 1169

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1170

1171
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[1] = p[1];
  d[4] = p[4];
  d[5] = p[5];
  d[8] = p[8];
  d[9] = p[9];
  d[12] = p[12];
  d[13] = p[13];
  d[16] = p[16<