rdopt.c 165 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
#include "vp9/common/pragmas.h"
John Koleszar's avatar
John Koleszar committed
17 18 19 20 21 22

#include "tokenize.h"
#include "treewriter.h"
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
23 24 25 26 27 28
#include "vp9/common/entropymode.h"
#include "vp9/common/reconinter.h"
#include "vp9/common/reconintra.h"
#include "vp9/common/reconintra4x4.h"
#include "vp9/common/findnearmv.h"
#include "vp9/common/quant_common.h"
John Koleszar's avatar
John Koleszar committed
29 30
#include "encodemb.h"
#include "quantize.h"
31
#include "vp9/common/idct.h"
John Koleszar's avatar
John Koleszar committed
32 33
#include "variance.h"
#include "mcomp.h"
Yunqing Wang's avatar
Yunqing Wang committed
34
#include "rdopt.h"
Paul Wilkins's avatar
Paul Wilkins committed
35
#include "ratectrl.h"
John Koleszar's avatar
John Koleszar committed
36
#include "vpx_mem/vpx_mem.h"
37 38
#include "vp9/common/systemdependent.h"
#include "vp9/encoder/encodemv.h"
John Koleszar's avatar
John Koleszar committed
39

40 41 42
#include "vp9/common/seg_common.h"
#include "vp9/common/pred_common.h"
#include "vp9/common/entropy.h"
43
#include "vp9_rtcd.h"
44
#include "vp9/common/mvref_common.h"
45

John Koleszar's avatar
John Koleszar committed
46 47 48 49 50 51 52 53
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x)  (x)
#else
#define IF_RTCD(x)  NULL
#endif

#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

54 55
#define INVALID_MV 0x80008000

56 57 58
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
77 78
};

79
#if CONFIG_PRED_FILTER
80
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  {ZEROMV,    LAST_FRAME,   NONE,  0},
  {ZEROMV,    LAST_FRAME,   NONE,  1},
  {DC_PRED,   INTRA_FRAME,  NONE,  0},

  {NEARESTMV, LAST_FRAME,   NONE,  0},
  {NEARESTMV, LAST_FRAME,   NONE,  1},
  {NEARMV,    LAST_FRAME,   NONE,  0},
  {NEARMV,    LAST_FRAME,   NONE,  1},

  {ZEROMV,    GOLDEN_FRAME, NONE,  0},
  {ZEROMV,    GOLDEN_FRAME, NONE,  1},
  {NEARESTMV, GOLDEN_FRAME, NONE,  0},
  {NEARESTMV, GOLDEN_FRAME, NONE,  1},

  {ZEROMV,    ALTREF_FRAME, NONE,  0},
  {ZEROMV,    ALTREF_FRAME, NONE,  1},
  {NEARESTMV, ALTREF_FRAME, NONE,  0},
  {NEARESTMV, ALTREF_FRAME, NONE,  1},

  {NEARMV,    GOLDEN_FRAME, NONE,  0},
  {NEARMV,    GOLDEN_FRAME, NONE,  1},
  {NEARMV,    ALTREF_FRAME, NONE,  0},
  {NEARMV,    ALTREF_FRAME, NONE,  1},

  {V_PRED,    INTRA_FRAME,  NONE,  0},
  {H_PRED,    INTRA_FRAME,  NONE,  0},
  {D45_PRED,  INTRA_FRAME,  NONE,  0},
  {D135_PRED, INTRA_FRAME,  NONE,  0},
  {D117_PRED, INTRA_FRAME,  NONE,  0},
  {D153_PRED, INTRA_FRAME,  NONE,  0},
  {D27_PRED,  INTRA_FRAME,  NONE,  0},
  {D63_PRED,  INTRA_FRAME,  NONE,  0},

  {TM_PRED,   INTRA_FRAME,  NONE,  0},

  {NEWMV,     LAST_FRAME,   NONE,  0},
  {NEWMV,     LAST_FRAME,   NONE,  1},
  {NEWMV,     GOLDEN_FRAME, NONE,  0},
  {NEWMV,     GOLDEN_FRAME, NONE,  1},
  {NEWMV,     ALTREF_FRAME, NONE,  0},
  {NEWMV,     ALTREF_FRAME, NONE,  1},

  {SPLITMV,   LAST_FRAME,   NONE,  0},
  {SPLITMV,   GOLDEN_FRAME, NONE,  0},
  {SPLITMV,   ALTREF_FRAME, NONE,  0},

  {B_PRED,    INTRA_FRAME,  NONE,  0},
  {I8X8_PRED, INTRA_FRAME,  NONE,  0},
John Koleszar's avatar
John Koleszar committed
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148

  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME, 0},

  {ZEROMV,    ALTREF_FRAME, LAST_FRAME,   0},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME,   0},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME,   0},

  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME, 0},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME, 0},

  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME,   0},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME, 0},

  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME, 0},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME,   0},
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME, 0},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME, 0},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME, 0},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME, 0},
#endif
John Koleszar's avatar
John Koleszar committed
168
};
169
#else
170
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
171 172
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
173

174 175
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
176

177 178
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
179

180 181
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
182

183 184
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
185

186 187 188 189 190 191 192 193
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
194

195
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
196

197 198 199
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
200

201 202 203
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
204

205 206
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
207

John Koleszar's avatar
John Koleszar committed
208 209 210 211
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
212

John Koleszar's avatar
John Koleszar committed
213 214 215
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
216

John Koleszar's avatar
John Koleszar committed
217 218 219
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
220

John Koleszar's avatar
John Koleszar committed
221 222 223
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
224

John Koleszar's avatar
John Koleszar committed
225 226
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
246
};
247
#endif
John Koleszar's avatar
John Koleszar committed
248 249

static void fill_token_costs(
250
  unsigned int (*c)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
251
  const vp9_prob(*p)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES],
John Koleszar's avatar
John Koleszar committed
252 253 254 255 256 257 258
  int block_type_counts) {
  int i, j, k;

  for (i = 0; i < block_type_counts; i++)
    for (j = 0; j < COEF_BANDS; j++)
      for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
        if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
259 260
          vp9_cost_tokens_skip((int *)(c[i][j][k]),
                               p[i][j][k],
261
                               vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
262
        else
263 264
          vp9_cost_tokens((int *)(c[i][j][k]),
                          p[i][j][k],
265
                          vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
266
      }
John Koleszar's avatar
John Koleszar committed
267 268
}

269

270 271 272 273
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
274

275
// 3* dc_qlookup[Q]*dc_qlookup[Q];
276

277
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
278 279 280
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

281
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
282 283 284 285 286 287 288
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
289 290
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
291
  }
Paul Wilkins's avatar
Paul Wilkins committed
292
}
John Koleszar's avatar
John Koleszar committed
293

294
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
295
  int q;
296

297
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
298
  return (11 * q * q) >> 6;
299 300
}

301
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
302 303
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
304 305
}

306

307
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
308
  int q, i;
John Koleszar's avatar
John Koleszar committed
309

310
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
311

John Koleszar's avatar
John Koleszar committed
312 313 314 315 316
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
317

John Koleszar's avatar
John Koleszar committed
318
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
319

John Koleszar's avatar
John Koleszar committed
320 321 322
  // Extend rate multiplier along side quantizer zbin increases
  if (cpi->zbin_over_quant  > 0) {
    double oq_factor;
323

John Koleszar's avatar
John Koleszar committed
324 325 326 327 328
    // Experimental code using the same basic equation as used for Q above
    // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
  }
John Koleszar's avatar
John Koleszar committed
329

John Koleszar's avatar
John Koleszar committed
330 331 332 333 334 335 336
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
337

John Koleszar's avatar
John Koleszar committed
338 339
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
340

John Koleszar's avatar
John Koleszar committed
341 342
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
343

344
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
345

346
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
347 348
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
349

John Koleszar's avatar
John Koleszar committed
350 351
  if (q < 8)
    q = 8;
352

John Koleszar's avatar
John Koleszar committed
353 354 355
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
356

John Koleszar's avatar
John Koleszar committed
357 358 359 360 361 362
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
363

John Koleszar's avatar
John Koleszar committed
364
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
365
    }
John Koleszar's avatar
John Koleszar committed
366 367
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
368

John Koleszar's avatar
John Koleszar committed
369 370 371 372 373 374
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
375

John Koleszar's avatar
John Koleszar committed
376
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
377
    }
John Koleszar's avatar
John Koleszar committed
378
  }
John Koleszar's avatar
John Koleszar committed
379

John Koleszar's avatar
John Koleszar committed
380
  fill_token_costs(
381
    cpi->mb.token_costs[TX_4X4],
382
    (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs,
John Koleszar's avatar
John Koleszar committed
383
    BLOCK_TYPES);
384 385
  fill_token_costs(
    cpi->mb.hybrid_token_costs[TX_4X4],
386
    (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11])
387 388
    cpi->common.fc.hybrid_coef_probs,
    BLOCK_TYPES);
John Koleszar's avatar
John Koleszar committed
389

John Koleszar's avatar
John Koleszar committed
390
  fill_token_costs(
391
    cpi->mb.token_costs[TX_8X8],
392
    (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
John Koleszar's avatar
John Koleszar committed
393
    BLOCK_TYPES_8X8);
394 395
  fill_token_costs(
    cpi->mb.hybrid_token_costs[TX_8X8],
396
    (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11])
397 398
    cpi->common.fc.hybrid_coef_probs_8x8,
    BLOCK_TYPES_8X8);
399

Daniel Kang's avatar
Daniel Kang committed
400
  fill_token_costs(
401
    cpi->mb.token_costs[TX_16X16],
402
    (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
Daniel Kang's avatar
Daniel Kang committed
403
    BLOCK_TYPES_16X16);
404 405
  fill_token_costs(
    cpi->mb.hybrid_token_costs[TX_16X16],
406
    (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11])
407 408
    cpi->common.fc.hybrid_coef_probs_16x16,
    BLOCK_TYPES_16X16);
Daniel Kang's avatar
Daniel Kang committed
409

John Koleszar's avatar
John Koleszar committed
410 411
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
412
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
413

414 415
  if (cpi->common.frame_type != KEY_FRAME)
  {
416
    vp9_build_nmv_cost_table(
417 418 419 420 421 422
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
423 424
}

425
int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) {
426
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
427

428
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
429 430 431
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
432

John Koleszar's avatar
John Koleszar committed
433
  return error;
John Koleszar's avatar
John Koleszar committed
434 435
}

436
int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
John Koleszar's avatar
John Koleszar committed
437 438 439 440
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
441

John Koleszar's avatar
John Koleszar committed
442 443 444
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
445

John Koleszar's avatar
John Koleszar committed
446
    berror = 0;
John Koleszar's avatar
John Koleszar committed
447

John Koleszar's avatar
John Koleszar committed
448 449 450
    for (j = dc; j < 16; j++) {
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
451 452
    }

John Koleszar's avatar
John Koleszar committed
453 454 455 456
    error += berror;
  }

  return error;
John Koleszar's avatar
John Koleszar committed
457 458
}

459
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
460 461
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
462

463
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
464

John Koleszar's avatar
John Koleszar committed
465 466 467
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
468

469
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
470
  }
John Koleszar's avatar
John Koleszar committed
471

John Koleszar's avatar
John Koleszar committed
472
  return error;
John Koleszar's avatar
John Koleszar committed
473 474
}

475
int vp9_uvsse(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
476 477 478 479 480 481 482
  unsigned char *uptr, *vptr;
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
483 484
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
506
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
507
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
508
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
509
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
510 511
    sse2 += sse1;
  } else {
512 513
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
514 515 516
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
517 518 519

}

520
static int cost_coeffs_2x2(MACROBLOCK *mb,
521
                           BLOCKD *b, PLANE_TYPE type,
522
                           ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
523
  int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
524 525 526 527 528
  int eob = b->eob;
  int pt;    /* surrounding block/prev coef predictor */
  int cost = 0;
  short *qcoeff_ptr = b->qcoeff;

529
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
530 531 532
  assert(eob <= 4);

  for (; c < eob; c++) {
533 534 535 536 537
    int v = qcoeff_ptr[vp9_default_zig_zag1d[c]];
    int t = vp9_dct_value_tokens_ptr[v].Token;
    cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]][pt][t];
    cost += vp9_dct_value_cost_ptr[v];
    pt = vp9_prev_token_class[t];
538 539 540
  }

  if (c < 4)
541
    cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]]
542 543 544 545 546 547 548
            [pt] [DCT_EOB_TOKEN];

  pt = (c != !type); // is eob first coefficient;
  *a = *l = pt;
  return cost;
}

549
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
550
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
551
                       int tx_size) {
552
  const int eob = b->eob;
553
  int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
554
  int cost = 0, default_eob, seg_eob;
555 556
  int pt;                     /* surrounding block/prev coef predictor */
  int const *scan, *band;
John Koleszar's avatar
John Koleszar committed
557
  short *qcoeff_ptr = b->qcoeff;
558 559 560
  MACROBLOCKD *xd = &mb->e_mbd;
  MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi;
  TX_TYPE tx_type = DCT_DCT;
Paul Wilkins's avatar
Paul Wilkins committed
561
  int segment_id = mbmi->segment_id;
562

563
  switch (tx_size) {
564
    case TX_4X4:
565 566
      scan = vp9_default_zig_zag1d;
      band = vp9_coef_bands;
567
      default_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
568 569 570 571 572
      if (type == PLANE_TYPE_Y_WITH_DC) {
        tx_type = get_tx_type_4x4(xd, b);
        if (tx_type != DCT_DCT) {
          switch (tx_type) {
            case ADST_DCT:
573
              scan = vp9_row_scan;
Deb Mukherjee's avatar
Deb Mukherjee committed
574 575 576
              break;

            case DCT_ADST:
577
              scan = vp9_col_scan;
Deb Mukherjee's avatar
Deb Mukherjee committed
578 579 580
              break;

            default:
581
              scan = vp9_default_zig_zag1d;
Deb Mukherjee's avatar
Deb Mukherjee committed
582 583
              break;
          }
584
        }
585
      }
Deb Mukherjee's avatar
Deb Mukherjee committed
586

587 588
      break;
    case TX_8X8:
589 590
      scan = vp9_default_zig_zag1d_8x8;
      band = vp9_coef_bands_8x8;
591
      default_eob = 64;
Deb Mukherjee's avatar
Deb Mukherjee committed
592
      if (type == PLANE_TYPE_Y_WITH_DC) {
593
        BLOCKD *bb;
594
        int ib = (int)(b - xd->block);
595 596 597
        if (ib < 16) {
          ib = (ib & 8) + ((ib & 4) >> 1);
          bb = xd->block + ib;
Deb Mukherjee's avatar
Deb Mukherjee committed
598
          tx_type = get_tx_type_8x8(xd, bb);
599
        }
600
      }
601 602
      break;
    case TX_16X16:
603 604
      scan = vp9_default_zig_zag1d_16x16;
      band = vp9_coef_bands_16x16;
605
      default_eob = 256;
Deb Mukherjee's avatar
Deb Mukherjee committed
606 607 608
      if (type == PLANE_TYPE_Y_WITH_DC) {
        tx_type = get_tx_type_16x16(xd, b);
      }
609 610 611 612
      break;
    default:
      break;
  }
613 614
  if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB))
    seg_eob = vp9_get_segdata(&mb->e_mbd, segment_id, SEG_LVL_EOB);
615 616 617
  else
    seg_eob = default_eob;

618
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
619

620 621 622
  if (tx_type != DCT_DCT) {
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
623
      int t = vp9_dct_value_tokens_ptr[v].Token;
624
      cost += mb->hybrid_token_costs[tx_size][type][band[c]][pt][t];
625 626
      cost += vp9_dct_value_cost_ptr[v];
      pt = vp9_prev_token_class[t];
627 628 629 630
    }
    if (c < seg_eob)
      cost += mb->hybrid_token_costs[tx_size][type][band[c]]
          [pt][DCT_EOB_TOKEN];
631
  } else {
632 633
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
634
      int t = vp9_dct_value_tokens_ptr[v].Token;
635
      cost += mb->token_costs[tx_size][type][band[c]][pt][t];
636 637
      cost += vp9_dct_value_cost_ptr[v];
      pt = vp9_prev_token_class[t];
638 639 640 641
    }
    if (c < seg_eob)
      cost += mb->token_costs[tx_size][type][band[c]]
          [pt][DCT_EOB_TOKEN];
642 643 644 645 646 647 648
  }

  pt = (c != !type); // is eob first coefficient;
  *a = *l = pt;
  return cost;
}

649
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
650 651
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
652
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
653 654 655
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
656

657 658 659
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
660

661 662 663 664 665 666
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
667

John Koleszar's avatar
John Koleszar committed
668
  for (b = 0; b < 16; b++)
Paul Wilkins's avatar
Paul Wilkins committed
669
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC,
670
                        ta + vp9_block2above[b], tl + vp9_block2left[b],
671
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
672

Paul Wilkins's avatar
Paul Wilkins committed
673
  cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
674
                      ta + vp9_block2above[24], tl + vp9_block2left[24],
675
                      TX_4X4);
John Koleszar's avatar
John Koleszar committed
676

John Koleszar's avatar
John Koleszar committed
677
  return cost;
John Koleszar's avatar
John Koleszar committed
678 679
}

680 681 682
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
683
                                const VP9_ENCODER_RTCD *rtcd,
684
                                int *skippable, int backup) {
John Koleszar's avatar
John Koleszar committed
685
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
686
  MACROBLOCKD *const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
687
  BLOCK   *const mb_y2 = mb->block + 24;
Paul Wilkins's avatar
Paul Wilkins committed
688
  BLOCKD *const x_y2  = xd->block + 24;
John Koleszar's avatar
John Koleszar committed
689 690 691
  short *Y2DCPtr = mb_y2->src_diff;
  BLOCK *beptr;
  int d;
692

John Koleszar's avatar
John Koleszar committed
693 694
  // Fdct and building the 2nd order block
  for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
695
    mb->vp9_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
John Koleszar's avatar
John Koleszar committed
696 697 698 699 700 701 702 703 704
    *Y2DCPtr++ = beptr->coeff[0];
    *Y2DCPtr++ = beptr->coeff[16];
  }

  // 2nd order fdct
  mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);

  // Quantization
  for (b = 0; b < 16; b++) {
705
    mb->quantize_b_4x4(&mb->block[b], &xd->block[b]);
John Koleszar's avatar
John Koleszar committed
706
  }
707

John Koleszar's avatar
John Koleszar committed
708
  // DC predication and Quantization of 2nd Order block
709
  mb->quantize_b_4x4(mb_y2, x_y2);
710

John Koleszar's avatar
John Koleszar committed
711
  // Distortion
712
  d = vp9_mbblock_error(mb, 1);
713

714
  d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
715

John Koleszar's avatar
John Koleszar committed
716 717
  *Distortion = (d >> 2);
  // rate
718
  *Rate = rdcost_mby_4x4(mb, backup);
719
  *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, 1);
720
}
John Koleszar's avatar
John Koleszar committed
721

722
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
723 724
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
725
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
726 727 728 729
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
730 731 732
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
733

Ronald S. Bultje's avatar
Ronald S. Bultje committed
734 735 736 737 738 739
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
740 741

  for (b = 0; b < 16; b += 4)
Paul Wilkins's avatar
Paul Wilkins committed
742
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC,
743
                        ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
744
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
745

Paul Wilkins's avatar
Paul Wilkins committed
746
  cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2,
747
                          ta + vp9_block2above[24], tl + vp9_block2left[24]);
John Koleszar's avatar
John Koleszar committed
748
  return cost;
749 750
}

John Koleszar's avatar
John Koleszar committed
751 752 753
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
754
                                const VP9_ENCODER_RTCD *rtcd,
755
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
756
  MACROBLOCKD *const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
757
  BLOCK   *const mb_y2 = mb->block + 24;
Paul Wilkins's avatar
Paul Wilkins committed
758
  BLOCKD *const x_y2  = xd->block + 24;
John Koleszar's avatar
John Koleszar committed
759 760
  int d;

761 762
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
763 764 765 766 767 768

  /* remove 1st order dc to properly combine 1st/2nd order distortion */
  mb->coeff[0] = 0;
  mb->coeff[64] = 0;
  mb->coeff[128] = 0;
  mb->coeff[192] = 0;
Paul Wilkins's avatar
Paul Wilkins committed
769 770 771 772
  xd->dqcoeff[0] = 0;
  xd->dqcoeff[64] = 0;
  xd->dqcoeff[128] = 0;
  xd->dqcoeff[192] = 0;
John Koleszar's avatar
John Koleszar committed
773

774 775
  d = vp9_mbblock_error(mb, 0);
  d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
776 777 778

  *Distortion = (d >> 2);
  // rate
779
  *Rate = rdcost_mby_8x8(mb, backup);
780
  *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, 1);
781
}
782

783
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
784
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
785
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
786 787 788
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

789 790 791
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
792

793 794 795 796 797 798
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
799

Paul Wilkins's avatar
Paul Wilkins committed
800
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
801 802
  return cost;
}
803

Daniel Kang's avatar
Daniel Kang committed
804
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
805 806
                                  const VP9_ENCODER_RTCD *rtcd, int *skippable,
                                  int backup) {
Daniel Kang's avatar
Daniel Kang committed
807
  int d;
Deb Mukherjee's avatar
Deb Mukherjee committed
808 809 810 811
  MACROBLOCKD *xd = &mb->e_mbd;
  BLOCKD *b  = &mb->e_mbd.block[0];
  BLOCK  *be = &mb->block[0];
  TX_TYPE tx_type;
Daniel Kang's avatar
Daniel Kang committed
812

Deb Mukherjee's avatar
Deb Mukherjee committed
813 814
  tx_type = get_tx_type_16x16(xd, b);
  if (tx_type != DCT_DCT) {
815
    vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16);
816
  } else
817
    vp9_transform_mby_16x16(mb);
818

819
  vp9_quantize_mby_16x16(mb);
820 821 822
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
823
  if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
824
    vp9_optimize_mby_16x16(mb, rtcd);
825

826
  d = vp9_mbblock_error(mb, 0);
Daniel Kang's avatar
Daniel Kang committed
827 828 829

  *Distortion = (d >> 2);
  // rate
830
  *Rate = rdcost_mby_16x16(mb, backup);
831
  *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
Daniel Kang's avatar
Daniel Kang committed
832 833
}

834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
                                     int r[2][TX_SIZE_MAX], int *rate,
                                     int d[TX_SIZE_MAX], int *distortion,
                                     int s[TX_SIZE_MAX], int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES]) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
  int64_t rd[2][TX_SIZE_MAX];
  int n;

  r[1][TX_16X16] = r[0][TX_16X16] + vp9_cost_one(cm->prob_tx[0]) +
                   vp9_cost_one(cm->prob_tx[1]);
  r[1][TX_8X8]   = r[0][TX_8X8] + vp9_cost_one(cm->prob_tx[0]) +
                   vp9_cost_zero(cm->prob_tx[1]);
  r[1][TX_4X4]   = r[0][TX_4X4] + vp9_cost_zero(cm->prob_tx[0]);

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

    for (n = TX_4X4; n <= TX_16X16; n++) {
      if (s[n]) {
        rd[0][n] = rd[1][n] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
      } else {
        rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n] + s0, d[n]);
        rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n] + s0, d[n]);
      }
867 868
    }
  } else {
869 870 871
    for (n = TX_4X4; n <= TX_16X16; n++) {
      rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n], d[n]);
      rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n], d[n]);
872 873 874
    }
  }

875 876 877
  if ( cm->txfm_mode == ALLOW_16X16 ||
      (cm->txfm_mode == TX_MODE_SELECT &&
       rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])) {
878
    mbmi->txfm_size = TX_16X16;
879 880
  } else if (cm->txfm_mode == ALLOW_8X8 ||
           (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_8X8] < rd[1][TX_4X4])) {
881 882
    mbmi->txfm_size = TX_8X8;
  } else {
883 884
    assert(cm->txfm_mode == ONLY_4X4 ||
          (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_4X4] <= rd[1][TX_8X8]));
885 886 887
    mbmi->txfm_size = TX_4X4;
  }

888 889 890 891 892 893 894 895 896
  *distortion = d[mbmi->txfm_size];
  *rate       = r[cm->txfm_mode == TX_MODE_SELECT][mbmi->txfm_size];
  *skip       = s[mbmi->txfm_size];

  txfm_cache[ONLY_4X4] = rd[0][TX_4X4];
  txfm_cache[ALLOW_8X8] = rd[0][TX_8X8];
  txfm_cache[ALLOW_16X16] = rd[0][TX_16X16];
  if (rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])
    txfm_cache[TX_MODE_SELECT] = rd[1][TX_16X16];
897
  else
898 899 900 901 902 903 904 905 906 907 908 909 910
    txfm_cache[TX_MODE_SELECT] = rd[1][TX_4X4] < rd[1][TX_8X8] ?
                                 rd[1][TX_4X4] : rd[1][TX_8X8];
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  VP9_COMMON *cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX];

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
911

912 913 914 915 916 917 918 919 920
  macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16],
                        IF_RTCD(&cpi->rtcd), &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8],
                      IF_RTCD(&cpi->rtcd), &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4],
                      IF_RTCD(&cpi->rtcd), &s[TX_4X4], 1);

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
                           txfm_cache);
921 922
}

John Koleszar's avatar
John Koleszar committed
923 924 925 926 927 928 929
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
930
}
931

Ronald S. Bultje's avatar
Ronald S. Bultje committed
932
#if CONFIG_SUPERBLOCKS
933 934 935 936
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
                            const VP9_ENCODER_RTCD *rtcd, int *skip,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
937
  MACROBLOCKD *const xd = &x->e_mbd;
938
  int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
939 940
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
941 942 943 944 945 946 947 948 949 950
  ENTROPY_CONTEXT_PLANES t_above[3][2], *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[3][2], *orig_left = xd->left_context;

  for (n = TX_4X4; n <= TX_16X16; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[0][n] = 0;
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
951 952 953

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
954
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
955

956
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
957 958 959 960
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[0][TX_16X16] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[0][TX_4X4] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[0][TX_8X8] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
982 983
  }

984 985 986 987
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache);

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
988 989 990
}
#endif

John Koleszar's avatar
John Koleszar committed
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) {
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[1] = p[1];
  d[4] = p[4];
  d[5] = p[5];
  d[8] = p[8];
  d[9] = p[9];
  d[12] = p[12];
  d[13] = p[13];
  d[16] = p[16];
  d[17] = p[17];
  d[20] = p[20];
  d[21] = p[21];
  d[24] = p[24];
  d[25] = p[25];
  d[28] = p[28];
  d[29] = p[29];
1010 1011
}

1012
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
1013
                                     BLOCKD *b, B_PREDICTION_MODE *best_mode,
1014
#if CONFIG_COMP_INTRA_PRED
1015 1016 1017 1018 1019 1020 1021
                                     B_PREDICTION_MODE *best_second_mode,
                                     int allow_comp,
#endif
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
1022
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
1023
  MACROBLOCKD *xd = &x->e_mbd;
Jingning Han's avatar
Jingning Han committed
1024

1025
#if CONFIG_COMP_INTRA_PRED
John Koleszar's avatar
John Koleszar committed
1026 1027
  B_PREDICTION_MODE mode2;
#endif
1028
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
1029 1030 1031 1032 1033
  int rate = 0;
  int distortion;

  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
1034 1035
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
1036 1037 1038 1039 1040 1041 1042 1043
  /*
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
   * a temp buffer that meets the stride requirements, but we are only
   * interested in the left 4x4 block
   * */
  DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16 * 4);
  DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);

1044 1045 1046 1047
#if CONFIG_NEWBINTRAMODES
  b->bmi.as_mode.context = vp9_find_bpred_context(b);
#endif
  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
John Koleszar's avatar
John Koleszar committed
1048
#if CONFIG_COMP_INTRA_PRED
Jingning Han's avatar
Jingning Han committed
1049 1050
    for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1));
                   mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
1051
#endif
1052
      int64_t this_rd;
John Koleszar's avatar
John Koleszar committed
1053
      int ratey;
John Koleszar's avatar
John Koleszar committed
1054

1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
#if CONFIG_NEWBINTRAMODES
      if (xd->frame_type == KEY_FRAME) {
        if (mode == B_CONTEXT_PRED) continue;
#if CONFIG_COMP_INTRA_PRED
        if (mode2 == B_CONTEXT_PRED) continue;
#endif
      } else {
        if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
            mode < B_CONTEXT_PRED)
          continue;
#if CONFIG_COMP_INTRA_PRED
        if (mode2 >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
            mode2 < B_CONTEXT_PRED)
          continue;
#endif
      }
#endif

1073
      b->bmi.as_mode.first = mode;
1074 1075 1076 1077
#if CONFIG_NEWBINTRAMODES
      rate = bmode_costs[
          mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode];
#else
John Koleszar's avatar
John Koleszar committed
1078
      rate = bmode_costs[mode];
1079
#endif
1080

1081
#if CONFIG_COMP_INTRA_PRED
John Koleszar's avatar
John Koleszar committed
1082
      if (mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
1083
#endif
1084
        vp9_intra4x4_predict(b, mode, b->predictor);
1085
#if CONFIG_COMP_INTRA_PRED
John Koleszar's avatar
John Koleszar committed
1086
      } else {
1087
        vp9_comp_intra4x4_predict(b, mode, mode2, b->predictor);
1088 1089 1090 1091 1092
#if CONFIG_NEWBINTRAMODES
        rate += bmode_costs[
            mode2 == B_CONTEXT_PRED ?
            mode2 - CONTEXT_PRED_REPLACEMENTS : mode2];
#else
John Koleszar's avatar
John Koleszar committed
1093
        rate += bmode_costs[mode2];
1094
#endif
John Koleszar's avatar
John Koleszar committed
1095 1096
      }
#endif
1097
      vp9_subtract_b(be, b, 16);
John Koleszar's avatar
John Koleszar committed
1098

Deb Mukherjee's avatar
Deb Mukherjee committed
1099 1100 1101
      b->bmi.as_mode.first = mode;
      tx_type = get_tx_type_4x4(xd, b);
      if (tx_type != DCT_DCT) {
1102
        vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
1103
        vp9_ht_quantize_b_4x4(be, b, tx_type);
Jingning Han's avatar
Jingning Han committed
1104
      } else {
1105
        x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
1106
        x->quantize_b_4x4(be, b);
Jingning Han's avatar
Jingning Han committed
1107
      }
John Koleszar's avatar
John Koleszar committed
1108

Deb Mukherjee's avatar
Deb Mukherjee committed
1109 1110
      tempa = ta;
      templ = tl;
John Koleszar's avatar
John Koleszar committed
1111

Deb Mukherjee's avatar
Deb Mukherjee committed
1112 1113
      ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
      rate += ratey;
1114
      distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
Jingning Han's avatar
Jingning Han committed
1115

Deb Mukherjee's avatar
Deb Mukherjee committed
1116
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
1117

Deb Mukherjee's avatar
Deb Mukherjee committed
1118 1119 1120 1121 1122 1123 1124
      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        best_tx_type = tx_type;
Jingning Han's avatar
Jingning Han committed
1125

1126
#if CONFIG_COMP_INTRA_PRED
Deb Mukherjee's avatar
Deb Mukherjee committed
1127
        *best_second_mode = mode2;
1128
#endif
Deb Mukherjee's avatar
Deb Mukherjee committed
1129 1130 1131 1132 1133
        *a = tempa;
        *l = templ;
        copy_predictor(best_predictor, b->predictor);
        vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
      }
Jingning Han's avatar
Jingning Han committed
1134
#if CONFIG_COMP_INTRA_PRED
John Koleszar's avatar
John Koleszar committed
1135
    }
Jingning Han's avatar
Jingning Han committed
1136
#endif
John Koleszar's avatar
John Koleszar committed
1137 1138
  }
  b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
1139
#if CONFIG_COMP_INTRA_PRED
John Koleszar's avatar
John Koleszar committed
1140
  b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
1141
#endif
1142

Jingning Han's avatar
Jingning Han committed
1143
  // inverse transform
Deb Mukherjee's avatar
Deb Mukherjee committed
1144
  if (best_tx_type != DCT_DCT)
1145
    vp9_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4);
1146
  else
Deb Mukherjee's avatar
Deb Mukherjee committed
1147 1148
    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
        best_dqcoeff, b->diff, 32);
Jingning Han's avatar
Jingning Han committed
1149

1150
  vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
John Koleszar's avatar
John Koleszar committed
1151

John Koleszar's avatar
John Koleszar committed
1152
  return best_rd;
John Koleszar's avatar
John Koleszar committed
1153 1154
}

1155
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate,
1156
                                     int *rate_y, int *Distortion, int64_t best_rd,
1157 1158 1159
#if CONFIG_COMP_INTRA_PRED
                                     int allow_comp,
#endif
John Koleszar's avatar
John Koleszar committed
1160 1161 1162 1163 1164 1165 1166 1167
                                     int update_contexts) {
  int i;
  MACROBLOCKD *const xd = &mb->e_mbd;
  int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
1168
  ENTROPY_CONTEXT *ta, *tl;
John Koleszar's avatar
John Koleszar committed
1169 1170 1171
  int *bmode_costs;

  if (update_contexts) {
Paul Wilkins's avatar
Paul Wilkins committed
1172 1173
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;