vp9_pickmode.c 88.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
17
#include "./vpx_dsp_rtcd.h"
18

19
#include "vpx/vpx_codec.h"
20
#include "vpx_dsp/vpx_dsp_common.h"
21
#include "vpx_mem/vpx_mem.h"
22
#include "vpx_ports/mem.h"
23

24
#include "vp9/common/vp9_blockd.h"
25 26
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
27
#include "vp9/common/vp9_pred_common.h"
28 29
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
30
#include "vp9/common/vp9_scan.h"
31

32
#include "vp9/encoder/vp9_cost.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_pickmode.h"
35
#include "vp9/encoder/vp9_ratectrl.h"
36
#include "vp9/encoder/vp9_rd.h"
37

38 39 40 41 42 43
typedef struct {
  uint8_t *data;
  int stride;
  int in_use;
} PRED_BUFFER;

44
static const int pos_shift_16x16[4][4] = {
clang-format's avatar
clang-format committed
45
  { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
46 47
};

clang-format's avatar
clang-format committed
48 49
static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, const MACROBLOCK *x,
                      const MACROBLOCKD *xd, const TileInfo *const tile,
50
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
clang-format's avatar
clang-format committed
51 52
                      int_mv *mv_ref_list, int_mv *base_mv, int mi_row,
                      int mi_col, int use_base_mv) {
53 54 55
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

Scott LaVarnway's avatar
Scott LaVarnway committed
56
  const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
57 58 59 60 61 62

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
James Zern's avatar
James Zern committed
63
  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
64 65 66 67 68 69 70

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
clang-format's avatar
clang-format committed
71 72
      const MODE_INFO *const candidate_mi =
          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
73
      // Keep counts for entropy encoding.
Scott LaVarnway's avatar
Scott LaVarnway committed
74
      context_counter += mode_2_counter[candidate_mi->mode];
75 76
      different_ref_found = 1;

Scott LaVarnway's avatar
Scott LaVarnway committed
77
      if (candidate_mi->ref_frame[0] == ref_frame)
78 79
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
                        refmv_count, mv_ref_list, Done);
80 81 82 83 84 85 86 87 88 89 90
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
clang-format's avatar
clang-format committed
91 92
      const MODE_INFO *const candidate_mi =
          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
93 94
      different_ref_found = 1;

Scott LaVarnway's avatar
Scott LaVarnway committed
95 96
      if (candidate_mi->ref_frame[0] == ref_frame)
        ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
97 98 99 100 101 102 103 104 105 106
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
clang-format's avatar
clang-format committed
107 108
        const MODE_INFO *const candidate_mi =
            xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
109 110

        // If the candidate is INTRA we don't want to consider its mv.
Scott LaVarnway's avatar
Scott LaVarnway committed
111
        IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
112
                                 refmv_count, mv_ref_list, Done);
113 114 115
      }
    }
  }
116 117 118 119 120
  if (use_base_mv &&
      !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
      ref_frame == LAST_FRAME) {
    // Get base layer mv.
    MV_REF *candidate =
clang-format's avatar
clang-format committed
121 122
        &cm->prev_frame
             ->mvs[(mi_col >> 1) + (mi_row >> 1) * (cm->mi_cols >> 1)];
123
    if (candidate->mv[0].as_int != INVALID_MV) {
clang-format's avatar
clang-format committed
124 125
      base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2);
      base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2);
126 127 128 129 130
      clamp_mv_ref(&base_mv->as_mv, xd);
    } else {
      base_mv->as_int = INVALID_MV;
    }
  }
131

clang-format's avatar
clang-format committed
132
Done:
133

134
  x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
135 136 137 138 139 140 141 142

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

143 144 145
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
146
                                  int64_t best_rd_sofar, int use_base_mv) {
147
  MACROBLOCKD *xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
148
  MODE_INFO *mi = xd->mi[0];
clang-format's avatar
clang-format committed
149
  struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
150 151
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
152
  MV mvp_full;
Scott LaVarnway's avatar
Scott LaVarnway committed
153
  const int ref = mi->ref_frame[0];
154
  const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
155
  MV center_mv;
156
  uint32_t dis;
157
  int rate_mode;
Alex Converse's avatar
Alex Converse committed
158
  const MvLimits tmp_mv_limits = x->mv_limits;
159
  int rv = 0;
160
  int cost_list[5];
clang-format's avatar
clang-format committed
161 162
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
      vp9_get_scaled_ref_frame(cpi, ref);
163 164 165 166 167
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
clang-format's avatar
clang-format committed
168
    for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
169
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
170
  }
Alex Converse's avatar
Alex Converse committed
171
  vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
172

Yaowu Xu's avatar
Yaowu Xu committed
173 174
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
175
    mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
Yaowu Xu's avatar
Yaowu Xu committed
176
  else
Dmitry Kovalev's avatar
Dmitry Kovalev committed
177
    mvp_full = x->pred_mv[ref];
178 179 180 181

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

182 183 184 185 186
  if (!use_base_mv)
    center_mv = ref_mv;
  else
    center_mv = tmp_mv->as_mv;

187 188 189
  vp9_full_pixel_search(
      cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
      cond_cost_list(cpi, cost_list), &center_mv, &tmp_mv->as_mv, INT_MAX, 0);
190

Alex Converse's avatar
Alex Converse committed
191
  x->mv_limits = tmp_mv_limits;
192

193 194 195
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
196

clang-format's avatar
clang-format committed
197 198
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, x->nmvjointcost, x->mvcost,
                             MV_COST_WEIGHT);
199

clang-format's avatar
clang-format committed
200 201 202 203
  rate_mode =
      cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]][INTER_OFFSET(NEWMV)];
  rv =
      !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) > best_rd_sofar);
204 205

  if (rv) {
JackyChen's avatar
JackyChen committed
206 207 208
    const int subpel_force_stop = use_base_mv && cpi->sf.base_mv_aggressive
                                      ? 2
                                      : cpi->sf.mv.subpel_force_stop;
clang-format's avatar
clang-format committed
209 210
    cpi->find_fractional_mv_step(
        x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
JackyChen's avatar
JackyChen committed
211
        x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
clang-format's avatar
clang-format committed
212 213 214 215
        cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
    *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
                               x->mvcost, MV_COST_WEIGHT);
216 217 218 219
  }

  if (scaled_ref_frame) {
    int i;
clang-format's avatar
clang-format committed
220
    for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
221
  }
222
  return rv;
223 224
}

225
static void block_variance(const uint8_t *src, int src_stride,
clang-format's avatar
clang-format committed
226 227
                           const uint8_t *ref, int ref_stride, int w, int h,
                           unsigned int *sse, int *sum, int block_size,
228 229 230
#if CONFIG_VP9_HIGHBITDEPTH
                           int use_highbitdepth, vpx_bit_depth_t bd,
#endif
231
                           uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) {
232 233 234 235 236 237 238
  int i, j, k = 0;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i += block_size) {
    for (j = 0; j < w; j += block_size) {
239
#if CONFIG_VP9_HIGHBITDEPTH
240 241 242 243 244 245
      if (use_highbitdepth) {
        switch (bd) {
          case VPX_BITS_8:
            vpx_highbd_8_get8x8var(src + src_stride * i + j, src_stride,
                                   ref + ref_stride * i + j, ref_stride,
                                   &sse8x8[k], &sum8x8[k]);
clang-format's avatar
clang-format committed
246
            break;
247 248 249 250 251 252 253 254 255 256 257 258
          case VPX_BITS_10:
            vpx_highbd_10_get8x8var(src + src_stride * i + j, src_stride,
                                    ref + ref_stride * i + j, ref_stride,
                                    &sse8x8[k], &sum8x8[k]);
            break;
          case VPX_BITS_12:
            vpx_highbd_12_get8x8var(src + src_stride * i + j, src_stride,
                                    ref + ref_stride * i + j, ref_stride,
                                    &sse8x8[k], &sum8x8[k]);
            break;
        }
      } else {
clang-format's avatar
clang-format committed
259 260 261
        vpx_get8x8var(src + src_stride * i + j, src_stride,
                      ref + ref_stride * i + j, ref_stride, &sse8x8[k],
                      &sum8x8[k]);
262 263
      }
#else
Johann's avatar
Johann committed
264
      vpx_get8x8var(src + src_stride * i + j, src_stride,
clang-format's avatar
clang-format committed
265 266
                    ref + ref_stride * i + j, ref_stride, &sse8x8[k],
                    &sum8x8[k]);
267
#endif
268 269
      *sse += sse8x8[k];
      *sum += sum8x8[k];
270
      var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6);
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
      k++;
    }
  }
}

static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
                               unsigned int *sse_i, int *sum_i,
                               unsigned int *var_o, unsigned int *sse_o,
                               int *sum_o) {
  const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  int i, j, k = 0;

  for (i = 0; i < nh; i += 2) {
    for (j = 0; j < nw; j += 2) {
      sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
clang-format's avatar
clang-format committed
288
                 sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
289
      sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
clang-format's avatar
clang-format committed
290
                 sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
291
      var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
clang-format's avatar
clang-format committed
292 293
                                       (b_width_log2_lookup[unit_size] +
                                        b_height_log2_lookup[unit_size] + 6));
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
      k++;
    }
  }
}

static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
                                    MACROBLOCK *x, MACROBLOCKD *xd,
                                    int *out_rate_sum, int64_t *out_dist_sum,
                                    unsigned int *var_y, unsigned int *sse_y,
                                    int mi_row, int mi_col, int *early_term) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
  const int64_t dc_thr = dc_quant * dc_quant >> 6;
  const int64_t ac_thr = ac_quant * ac_quant >> 6;
  unsigned int var;
  int sum;
  int skip_dc = 0;

  const int bw = b_width_log2_lookup[bsize];
  const int bh = b_height_log2_lookup[bsize];
  const int num8x8 = 1 << (bw + bh - 2);
clang-format's avatar
clang-format committed
323 324 325
  unsigned int sse8x8[64] = { 0 };
  int sum8x8[64] = { 0 };
  unsigned int var8x8[64] = { 0 };
326 327
  TX_SIZE tx_size;
  int i, k;
328
#if CONFIG_VP9_HIGHBITDEPTH
329
  const vpx_bit_depth_t bd = cpi->common.bit_depth;
330
#endif
331 332 333
  // Calculate variance for whole partition, and also save 8x8 blocks' variance
  // to be used in following transform skipping test.
  block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
334 335 336 337 338
                 4 << bw, 4 << bh, &sse, &sum, 8,
#if CONFIG_VP9_HIGHBITDEPTH
                 cpi->common.use_highbitdepth, bd,
#endif
                 sse8x8, sum8x8, var8x8);
paulwilkins's avatar
paulwilkins committed
339
  var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
340 341 342 343 344 345

  *var_y = var;
  *sse_y = sse;

  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
346 347
      tx_size = VPXMIN(max_txsize_lookup[bsize],
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
348 349 350
    else
      tx_size = TX_8X8;

351
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
Scott LaVarnway's avatar
Scott LaVarnway committed
352
        cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
353 354 355
      tx_size = TX_8X8;
    else if (tx_size > TX_16X16)
      tx_size = TX_16X16;
356
  } else {
357 358
    tx_size = VPXMIN(max_txsize_lookup[bsize],
                     tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
359 360 361
  }

  assert(tx_size >= TX_8X8);
Scott LaVarnway's avatar
Scott LaVarnway committed
362
  xd->mi[0]->tx_size = tx_size;
363 364 365

  // Evaluate if the partition block is a skippable block in Y plane.
  {
clang-format's avatar
clang-format committed
366 367 368
    unsigned int sse16x16[16] = { 0 };
    int sum16x16[16] = { 0 };
    unsigned int var16x16[16] = { 0 };
369 370
    const int num16x16 = num8x8 >> 2;

clang-format's avatar
clang-format committed
371 372 373
    unsigned int sse32x32[4] = { 0 };
    int sum32x32[4] = { 0 };
    unsigned int var32x32[4] = { 0 };
374 375 376 377
    const int num32x32 = num8x8 >> 4;

    int ac_test = 1;
    int dc_test = 1;
clang-format's avatar
clang-format committed
378 379 380 381 382 383 384 385 386
    const int num = (tx_size == TX_8X8)
                        ? num8x8
                        : ((tx_size == TX_16X16) ? num16x16 : num32x32);
    const unsigned int *sse_tx =
        (tx_size == TX_8X8) ? sse8x8
                            : ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
    const unsigned int *var_tx =
        (tx_size == TX_8X8) ? var8x8
                            : ((tx_size == TX_16X16) ? var16x16 : var32x32);
387 388 389 390 391 392 393 394 395 396

    // Calculate variance if tx_size > TX_8X8
    if (tx_size >= TX_16X16)
      calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
                         sum16x16);
    if (tx_size == TX_32X32)
      calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
                         sse32x32, sum32x32);

    // Skipping test
397
    x->skip_txfm[0] = SKIP_TXFM_NONE;
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
    for (k = 0; k < num; k++)
      // Check if all ac coefficients can be quantized to zero.
      if (!(var_tx[k] < ac_thr || var == 0)) {
        ac_test = 0;
        break;
      }

    for (k = 0; k < num; k++)
      // Check if dc coefficient can be quantized to zero.
      if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
        dc_test = 0;
        break;
      }

    if (ac_test) {
413
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
414

clang-format's avatar
clang-format committed
415
      if (dc_test) x->skip_txfm[0] = SKIP_TXFM_AC_DC;
416 417 418 419 420
    } else if (dc_test) {
      skip_dc = 1;
    }
  }

421
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
clang-format's avatar
clang-format committed
422
    int skip_uv[2] = { 0 };
423 424 425 426 427 428 429 430 431 432
    unsigned int var_uv[2];
    unsigned int sse_uv[2];

    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;

    // Transform skipping test in UV planes.
    for (i = 1; i <= 2; i++) {
      struct macroblock_plane *const p = &x->plane[i];
      struct macroblockd_plane *const pd = &xd->plane[i];
Scott LaVarnway's avatar
Scott LaVarnway committed
433
      const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd);
434
      const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
435 436 437 438
      const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
      const int uv_bw = b_width_log2_lookup[uv_bsize];
      const int uv_bh = b_height_log2_lookup[uv_bsize];
      const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
clang-format's avatar
clang-format committed
439
                     (uv_bh - b_height_log2_lookup[unit_size]);
440 441 442 443 444
      const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
      const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
      int j = i - 1;

      vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
clang-format's avatar
clang-format committed
445 446
      var_uv[j] = cpi->fn_ptr[uv_bsize].vf(
          p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]);
447

448 449 450 451 452
      if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
          (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
        skip_uv[j] = 1;
      else
        break;
453 454 455 456 457 458 459 460 461 462 463 464 465
    }

    // If the transform in YUV planes are skippable, the mode search checks
    // fewer inter modes and doesn't check intra modes.
    if (skip_uv[0] & skip_uv[1]) {
      *early_term = 1;
    }

    return;
  }

  if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
466 467
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> (xd->bd - 5), &rate, &dist);
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
#else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
  }

  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }

#if CONFIG_VP9_HIGHBITDEPTH
483 484
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                               ac_quant >> (xd->bd - 5), &rate, &dist);
485
#else
clang-format's avatar
clang-format committed
486 487
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
                               &rate, &dist);
488 489 490 491 492 493
#endif  // CONFIG_VP9_HIGHBITDEPTH

  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
}

clang-format's avatar
clang-format committed
494 495 496 497
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
                              MACROBLOCKD *xd, int *out_rate_sum,
                              int64_t *out_dist_sum, unsigned int *var_y,
                              unsigned int *sse_y) {
498 499 500 501
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
502 503
  int rate;
  int64_t dist;
504 505
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
506 507
  const int64_t dc_thr = p->quant_thred[0] >> 6;
  const int64_t ac_thr = p->quant_thred[1] >> 6;
508 509
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
510 511
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
512 513
  int skip_dc = 0;

514 515 516
  *var_y = var;
  *sse_y = sse;

517 518
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
Scott LaVarnway's avatar
Scott LaVarnway committed
519
      xd->mi[0]->tx_size =
520 521
          VPXMIN(max_txsize_lookup[bsize],
                 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
522
    else
Scott LaVarnway's avatar
Scott LaVarnway committed
523
      xd->mi[0]->tx_size = TX_8X8;
524

525
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
Scott LaVarnway's avatar
Scott LaVarnway committed
526 527 528 529
        cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
      xd->mi[0]->tx_size = TX_8X8;
    else if (xd->mi[0]->tx_size > TX_16X16)
      xd->mi[0]->tx_size = TX_16X16;
530
  } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
531
    xd->mi[0]->tx_size =
532 533
        VPXMIN(max_txsize_lookup[bsize],
               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
534 535
  }

536 537
  // Evaluate if the partition block is a skippable block in Y plane.
  {
clang-format's avatar
clang-format committed
538
    const BLOCK_SIZE unit_size = txsize_to_bsize[xd->mi[0]->tx_size];
539 540 541 542 543 544
    const unsigned int num_blk_log2 =
        (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
        (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
    const unsigned int sse_tx = sse >> num_blk_log2;
    const unsigned int var_tx = var >> num_blk_log2;

545
    x->skip_txfm[0] = SKIP_TXFM_NONE;
546 547
    // Check if all ac coefficients can be quantized to zero.
    if (var_tx < ac_thr || var == 0) {
548
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
549 550
      // Check if dc coefficient can be quantized to zero.
      if (sse_tx - var_tx < dc_thr || sse == var)
551
        x->skip_txfm[0] = SKIP_TXFM_AC_DC;
552
    } else {
clang-format's avatar
clang-format committed
553
      if (sse_tx - var_tx < dc_thr || sse == var) skip_dc = 1;
554 555 556
    }
  }

557
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
558 559 560 561 562
    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;
    return;
  }

563
  if (!skip_dc) {
564
#if CONFIG_VP9_HIGHBITDEPTH
565 566
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> (xd->bd - 5), &rate, &dist);
567
#else
568
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
569 570
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
571
  }
572

573 574 575 576 577 578 579
  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }
580

581
#if CONFIG_VP9_HIGHBITDEPTH
582 583
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                               ac_quant >> (xd->bd - 5), &rate, &dist);
584
#else
clang-format's avatar
clang-format committed
585 586
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
                               &rate, &dist);
587 588
#endif  // CONFIG_VP9_HIGHBITDEPTH

589 590
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
591 592
}

593
#if CONFIG_VP9_HIGHBITDEPTH
594
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
595 596
                      int *skippable, int64_t *sse, BLOCK_SIZE bsize,
                      TX_SIZE tx_size) {
597 598
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int var_y, sse_y;
599

600
  (void)tx_size;
601 602
  model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
                    &sse_y);
603 604 605 606 607
  *sse = INT_MAX;
  *skippable = 0;
  return;
}
#else
608
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
609 610
                      int *skippable, int64_t *sse, BLOCK_SIZE bsize,
                      TX_SIZE tx_size) {
611
  MACROBLOCKD *xd = &x->e_mbd;
612 613
  const struct macroblockd_plane *pd = &xd->plane[0];
  struct macroblock_plane *const p = &x->plane[0];
614 615 616 617 618
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  const int step = 1 << (tx_size << 1);
  const int block_step = (1 << tx_size);
  int block = 0, r, c;
clang-format's avatar
clang-format committed
619 620 621 622
  const int max_blocks_wide =
      num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
  const int max_blocks_high =
      num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
623
  int eob_cost = 0;
624 625
  const int bw = 4 * num_4x4_w;
  const int bh = 4 * num_4x4_h;
626 627

  (void)cpi;
628 629 630 631 632 633

  // The max tx_size passed in is TX_16X16.
  assert(tx_size != TX_32X32);

  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
                     pd->dst.buf, pd->dst.stride);
634 635 636 637 638 639 640 641 642 643 644
  *skippable = 1;
  // Keep track of the row and column of the blocks we use so that we know
  // if we are in the unrestricted motion border.
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
645
        const int diff_stride = bw;
646
        const int16_t *src_diff;
647
        src_diff = &p->src_diff[(r * diff_stride + c) << 2];
648 649 650

        switch (tx_size) {
          case TX_16X16:
James Zern's avatar
James Zern committed
651
            vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
652 653
            vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
clang-format's avatar
clang-format committed
654 655
                            pd->dequant, eob, scan_order->scan,
                            scan_order->iscan);
656 657
            break;
          case TX_8X8:
James Zern's avatar
James Zern committed
658
            vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
659 660
            vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
clang-format's avatar
clang-format committed
661 662
                            pd->dequant, eob, scan_order->scan,
                            scan_order->iscan);
663 664 665 666 667
            break;
          case TX_4X4:
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
clang-format's avatar
clang-format committed
668 669
                            pd->dequant, eob, scan_order->scan,
                            scan_order->iscan);
670
            break;
clang-format's avatar
clang-format committed
671
          default: assert(0); break;
672
        }
673
        *skippable &= (*eob == 0);
674
        eob_cost += 1;
675 676 677 678 679
      }
      block += step;
    }
  }

680
  this_rdc->rate = 0;
681 682 683
  if (*sse < INT64_MAX) {
    *sse = (*sse << 6) >> 2;
    if (*skippable) {
684
      this_rdc->dist = *sse;
685 686
      return;
    }
687
  }
688

689
  block = 0;
690
  this_rdc->dist = 0;
691 692 693 694 695 696 697
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
698 699

        if (*eob == 1)
700
          this_rdc->rate += (int)abs(qcoeff[0]);
701
        else if (*eob > 1)
702
          this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);
703

clang-format's avatar
clang-format committed
704
        this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
705 706 707 708 709
      }
      block += step;
    }
  }

710
  // If skippable is set, rate gets clobbered later.
711 712
  this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
  this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
713
}
714
#endif
715

716
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
717
                               MACROBLOCK *x, MACROBLOCKD *xd,
clang-format's avatar
clang-format committed
718 719 720
                               RD_COST *this_rdc, unsigned int *var_y,
                               unsigned int *sse_y, int start_plane,
                               int stop_plane) {
721 722 723 724 725 726 727
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  int i;
728 729 730 731 732 733 734
#if CONFIG_VP9_HIGHBITDEPTH
  uint64_t tot_var = *var_y;
  uint64_t tot_sse = *sse_y;
#else
  uint32_t tot_var = *var_y;
  uint32_t tot_sse = *sse_y;
#endif
735

736 737
  this_rdc->rate = 0;
  this_rdc->dist = 0;
738

739
  for (i = start_plane; i <= stop_plane; ++i) {
740 741 742 743
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const uint32_t dc_quant = pd->dequant[0];
    const uint32_t ac_quant = pd->dequant[1];
744
    const BLOCK_SIZE bs = plane_bsize;
745
    unsigned int var;
clang-format's avatar
clang-format committed
746
    if (!x->color_sensitivity[i - 1]) continue;
747

clang-format's avatar
clang-format committed
748 749
    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
                             pd->dst.stride, &sse);
750 751 752
    assert(sse >= var);
    tot_var += var;
    tot_sse += sse;
753

clang-format's avatar
clang-format committed
754
#if CONFIG_VP9_HIGHBITDEPTH
755 756
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> (xd->bd - 5), &rate, &dist);
clang-format's avatar
clang-format committed
757
#else
758 759
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> 3, &rate, &dist);
clang-format's avatar
clang-format committed
760
#endif  // CONFIG_VP9_HIGHBITDEPTH
761

762 763
    this_rdc->rate += rate >> 1;
    this_rdc->dist += dist << 3;
764

clang-format's avatar
clang-format committed
765
#if CONFIG_VP9_HIGHBITDEPTH
766 767
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
clang-format's avatar
clang-format committed
768 769 770 771
#else
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3,
                                 &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
772

773 774
    this_rdc->rate += rate;
    this_rdc->dist += dist << 4;
775
  }
776 777

#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
778 779
  *var_y = tot_var > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_var;
  *sse_y = tot_sse > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_sse;
780
#else
clang-format's avatar
clang-format committed
781 782
  *var_y = tot_var;
  *sse_y = tot_sse;
783
#endif
784 785
}

786 787 788 789 790 791 792 793 794 795 796 797 798
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
clang-format's avatar
clang-format committed
799
  if (p != NULL) p->in_use = 0;
800 801
}

clang-format's avatar
clang-format committed
802 803
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
                                 int mi_row, int mi_col,
804
                                 MV_REFERENCE_FRAME ref_frame,
clang-format's avatar
clang-format committed
805 806
                                 PREDICTION_MODE this_mode, unsigned int var_y,
                                 unsigned int sse_y,
807 808 809
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
810
  MODE_INFO *const mi = xd->mi[0];
811 812 813 814 815 816
  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
817
  int motion_low = 1;
clang-format's avatar
clang-format committed
818 819
  if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 ||
      mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64)
820
    motion_low = 0;
821
  if (x->encode_breakout > 0 && motion_low == 1) {
822 823 824 825 826 827
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
828
        VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
829
#if CONFIG_VP9_HIGHBITDEPTH
830
    const int shift = (xd->bd << 1) - 16;
831
#endif
832 833

    // Calculate threshold according to dequant value.
834
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
835 836 837 838 839
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
840 841 842 843
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
844
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
845 846

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
847 848 849 850 851
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
852 853 854 855 856 857 858 859 860
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }