vp9_pickmode.c 26.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16 17 18 19 20 21
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
22 23
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
24

25
#include "vp9/encoder/vp9_encoder.h"
26
#include "vp9/encoder/vp9_pickmode.h"
27
#include "vp9/encoder/vp9_ratectrl.h"
28
#include "vp9/encoder/vp9_rd.h"
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       const TileInfo *const tile,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       int_mv *mv_ref_list,
                       int mi_row, int mi_col) {
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
  vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
                                                   xd->mi_stride];
      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
      // Keep counts for entropy encoding.
      context_counter += mode_2_counter[candidate->mode];
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1));
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
                                                    xd->mi_stride]->mbmi;
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
        ADD_MV_REF_LIST(candidate->mv[0]);
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
                                              * xd->mi_stride]->mbmi;

        // If the candidate is INTRA we don't want to consider its mv.
        IF_DIFF_REF_FRAME_ADD_MV(candidate);
      }
    }
  }

 Done:

  mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

109 110 111 112
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
                                  int64_t best_rd_sofar) {
113
  MACROBLOCKD *xd = &x->e_mbd;
114
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
115
  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
116 117
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
118
  MV mvp_full;
119
  const int ref = mbmi->ref_frame[0];
120
  const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
121 122 123 124 125 126 127
  int dis;
  int rate_mode;
  const int tmp_col_min = x->mv_col_min;
  const int tmp_col_max = x->mv_col_max;
  const int tmp_row_min = x->mv_row_min;
  const int tmp_row_max = x->mv_row_max;
  int rv = 0;
128 129
  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                        ref);
130 131 132 133 134 135 136
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
    for (i = 0; i < MAX_MB_PLANE; i++)
      backup_yv12[i] = xd->plane[i].pre[0];
137
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
138
  }
139
  vp9_set_mv_search_range(x, &ref_mv);
140

141 142 143 144 145 146 147
  if (cpi->common.show_frame &&
      (x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[LAST_FRAME]) {
    tmp_mv->as_int = INVALID_MV;
    if (scaled_ref_frame) {
      int i;
      for (i = 0; i < MAX_MB_PLANE; i++)
        xd->plane[i].pre[0] = backup_yv12[i];
148
    }
149
    return rv;
150
  }
151 152 153 154
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
    mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
  else
155
    mvp_full = x->pred_mv[ref];
156 157 158 159

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

160 161
  vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv,
                        &tmp_mv->as_mv, INT_MAX, 0);
162

163 164 165 166 167
  x->mv_col_min = tmp_col_min;
  x->mv_col_max = tmp_col_max;
  x->mv_row_min = tmp_row_min;
  x->mv_row_max = tmp_row_max;

168 169 170
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
171

172 173
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                             x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
174

175 176 177 178 179 180 181 182 183 184 185 186 187
  rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref]]
                                  [INTER_OFFSET(NEWMV)];
  rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
         best_rd_sofar);

  if (rv) {
    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                 cpi->common.allow_high_precision_mv,
                                 x->errorperbit,
                                 &cpi->fn_ptr[bsize],
                                 cpi->sf.mv.subpel_force_stop,
                                 cpi->sf.mv.subpel_iters_per_step,
                                 x->nmvjointcost, x->mvcost,
188
                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
189
    x->pred_mv[ref] = tmp_mv->as_mv;
190 191 192 193 194 195 196
  }

  if (scaled_ref_frame) {
    int i;
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = backup_yv12[i];
  }
197
  return rv;
198 199
}

200

201 202
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
203 204
                              int *out_rate_sum, int64_t *out_dist_sum,
                              unsigned int *var_y, unsigned int *sse_y) {
205 206 207 208
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
209 210
  int rate;
  int64_t dist;
211 212
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
213 214
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
215 216
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
217 218 219
  *var_y = var;
  *sse_y = sse;

220
  if (sse < dc_quant * dc_quant >> 6)
221
    x->skip_txfm = 1;
222
  else if (var < ac_quant * ac_quant >> 6)
223 224 225 226
    x->skip_txfm = 2;
  else
    x->skip_txfm = 0;

227 228 229 230 231 232 233 234 235 236 237
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
      xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize],
                          tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
    else
      xd->mi[0]->mbmi.tx_size = TX_8X8;
  } else {
    xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize],
                         tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  }

238 239 240
  vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
                               dc_quant >> 3, &rate, &dist);
  *out_rate_sum = rate >> 1;
241
  *out_dist_sum = dist << 3;
242 243 244 245 246

  vp9_model_rd_from_var_lapndz(var, 1 << num_pels_log2_lookup[bsize],
                               ac_quant >> 3, &rate, &dist);
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
247 248
}

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
  p->in_use = 0;
}

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
                                 MV_REFERENCE_FRAME ref_frame,
                                 PREDICTION_MODE this_mode,
                                 unsigned int var_y, unsigned int sse_y,
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;

  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
  if (x->encode_breakout > 0) {
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
        MIN(((unsigned int)x->encode_breakout << 4), max_thresh);

    // Calculate threshold according to dequant value.
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }

  // Y skipping condition checking for ac and dc.
  if (var <= thresh_ac && (sse - var) <= thresh_dc) {
    unsigned int sse_u, sse_v;
    unsigned int var_u, var_v;

    // Skip UV prediction unless breakout is zero (lossless) to save
    // computation with low impact on the result
    if (x->encode_breakout == 0) {
      xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
      xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
      vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
    }

    var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
                                    x->plane[1].src.stride,
                                    xd->plane[1].dst.buf,
                                    xd->plane[1].dst.stride, &sse_u);

    // U skipping condition checking
    if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
      var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
                                      x->plane[2].src.stride,
                                      xd->plane[2].dst.buf,
                                      xd->plane[2].dst.stride, &sse_v);

      // V skipping condition checking
      if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
        x->skip = 1;

        // The cost of skip bit needs to be added.
        *rate = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
                                     [INTER_OFFSET(this_mode)];

        // More on this part of rate
        // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

        // Scaling factor for SSE from spatial domain to frequency
        // domain is 16. Adjust distortion accordingly.
        // TODO(yunqingwang): In this function, only y-plane dist is
        // calculated.
        *dist = (sse << 4);  // + ((sse_u + sse_v) << 4);

        // *disable_skip = 1;
      }
    }
  }
}

352 353 354 355 356 357 358
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                            const TileInfo *const tile,
                            int mi_row, int mi_col,
                            int *returnrate,
                            int64_t *returndistortion,
359
                            BLOCK_SIZE bsize) {
360
  MACROBLOCKD *xd = &x->e_mbd;
361
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
362 363
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
364
  PREDICTION_MODE this_mode, best_mode = ZEROMV;
365
  MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
366 367
  TX_SIZE best_tx_size = MIN(max_txsize_lookup[bsize],
                             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
368
  INTERP_FILTER best_pred_filter = EIGHTTAP;
369 370 371 372 373
  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                    VP9_ALT_FLAG };
  int64_t best_rd = INT64_MAX;
374
  int64_t this_rd = INT64_MAX;
375
  int skip_txfm = 0;
376

377 378
  int rate = INT_MAX;
  int64_t dist = INT64_MAX;
379 380 381
  // var_y and sse_y are saved to be used in skipping checking
  unsigned int var_y = UINT_MAX;
  unsigned int sse_y = UINT_MAX;
382

383 384 385 386 387 388 389
  VP9_COMMON *cm = &cpi->common;
  int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);

  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                           intra_cost_penalty, 0);
  const int64_t intra_mode_cost = 50;

390
  unsigned char segment_id = mbmi->segment_id;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
391 392
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
  const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
393
  // Mode index conversion form THR_MODES to PREDICTION_MODE for a ref frame.
394
  int mode_idx[MB_MODE_COUNT] = {0};
395
  INTERP_FILTER filter_ref = cm->interp_filter;
396
  int bsl = mi_width_log2_lookup[bsize];
397 398
  const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
      (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm)) % 2 : 0;
399
  int const_motion[MAX_REF_FRAMES] = { 0 };
400

401 402 403 404 405 406 407 408 409 410 411 412 413
  // For speed 6, the result of interp filter is reused later in actual encoding
  // process.
  int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
  int pixels_in_block = bh * bw;
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  PRED_BUFFER tmp[4];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
  struct buf_2d orig_dst = pd->dst;
  PRED_BUFFER *best_pred = NULL;
  PRED_BUFFER *this_mode_pred = NULL;
  int i;

414
#if CONFIG_DENOISING
415 416 417
  if (cpi->oxcf.noise_sensitivity > 0) {
    vp9_denoiser_reset_frame_stats(&cpi->denoiser);
  }
418 419
#endif

420 421 422 423 424 425 426 427 428 429 430 431
  if (cpi->sf.reuse_inter_pred_sby) {
    for (i = 0; i < 3; i++) {
      tmp[i].data = &pred_buf[pixels_in_block * i];
      tmp[i].stride = bw;
      tmp[i].in_use = 0;
    }

    tmp[3].data = pd->dst.buf;
    tmp[3].stride = pd->dst.stride;
    tmp[3].in_use = 0;
  }

432 433 434
  x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

  x->skip = 0;
435

436 437
  // initialize mode decisions
  *returnrate = INT_MAX;
438
  *returndistortion = INT64_MAX;
439 440 441 442 443
  vpx_memset(mbmi, 0, sizeof(MB_MODE_INFO));
  mbmi->sb_type = bsize;
  mbmi->ref_frame[0] = NONE;
  mbmi->ref_frame[1] = NONE;
  mbmi->tx_size = MIN(max_txsize_lookup[bsize],
444 445 446
                      tx_mode_to_biggest_tx_size[cm->tx_mode]);
  mbmi->interp_filter = cm->interp_filter == SWITCHABLE ?
                        EIGHTTAP : cm->interp_filter;
447
  mbmi->skip = 0;
448
  mbmi->segment_id = segment_id;
449

450
  for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
451 452
    x->pred_mv_sad[ref_frame] = INT_MAX;
    if (cpi->ref_frame_flags & flag_list[ref_frame]) {
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
      const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
      int_mv *const candidates = mbmi->ref_mvs[ref_frame];
      const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
      vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                           sf, sf);

      if (cm->coding_use_prev_mi)
        vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame,
                         candidates, mi_row, mi_col);
      else
        const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0],
                                             ref_frame, candidates,
                                             mi_row, mi_col);

      vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
                            &frame_mv[NEARESTMV][ref_frame],
                            &frame_mv[NEARMV][ref_frame]);

      if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
        vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
                    ref_frame, bsize);
474 475 476 477 478
    }
    frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
    frame_mv[ZEROMV][ref_frame].as_int = 0;
  }

479 480 481 482 483
  if (xd->up_available)
    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
  else if (xd->left_available)
    filter_ref = xd->mi[-1]->mbmi.interp_filter;

484
  for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
485 486 487 488 489 490 491 492 493
    if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
      continue;

    // Select prediction reference frames.
    xd->plane[0].pre[0] = yv12_mb[ref_frame][0];

    clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
    clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);

494 495
    mbmi->ref_frame[0] = ref_frame;

496 497 498 499 500 501 502 503
    // Set conversion index for LAST_FRAME.
    if (ref_frame == LAST_FRAME) {
      mode_idx[NEARESTMV] = THR_NEARESTMV;   // LAST_FRAME, NEARESTMV
      mode_idx[NEARMV] = THR_NEARMV;         // LAST_FRAME, NEARMV
      mode_idx[ZEROMV] = THR_ZEROMV;         // LAST_FRAME, ZEROMV
      mode_idx[NEWMV] = THR_NEWMV;           // LAST_FRAME, NEWMV
    }

504
    for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
505 506
      int rate_mv = 0;

507 508 509 510
      if (const_motion[ref_frame] &&
          (this_mode == NEARMV || this_mode == ZEROMV))
        continue;

511
      if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
512
        continue;
513

514 515
      if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]],
                              rd_thresh_freq_fact[this_mode]))
516 517
        continue;

518
      if (this_mode == NEWMV) {
519
        if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
520
          continue;
521 522 523
        if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
                                    &frame_mv[NEWMV][ref_frame],
                                    &rate_mv, best_rd))
524 525 526
          continue;
      }

527 528 529
      if (this_mode != NEARESTMV &&
          frame_mv[this_mode][ref_frame].as_int ==
              frame_mv[NEARESTMV][ref_frame].as_int)
530 531
          continue;

532 533
      mbmi->mode = this_mode;
      mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
534

535 536 537
      // Search for the best prediction filter type, when the resulting
      // motion vector is at sub-pixel accuracy level for luma component, i.e.,
      // the last three bits are all zeros.
538 539 540 541 542 543 544 545 546 547
      if (cpi->sf.reuse_inter_pred_sby) {
        if (this_mode == NEARESTMV) {
          this_mode_pred = &tmp[3];
        } else {
          this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
          pd->dst.buf = this_mode_pred->data;
          pd->dst.stride = bw;
        }
      }

548
      if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
549
          pred_filter_search &&
550 551 552 553
          ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
           (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
        int pf_rate[3];
        int64_t pf_dist[3];
554 555
        unsigned int pf_var[3];
        unsigned int pf_sse[3];
556
        TX_SIZE pf_tx_size[3];
557 558
        int64_t best_cost = INT64_MAX;
        INTERP_FILTER best_filter = SWITCHABLE, filter;
559
        PRED_BUFFER *current_pred = this_mode_pred;
560 561 562 563 564 565

        for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
          int64_t cost;
          mbmi->interp_filter = filter;
          vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
          model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
566
                            &pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
567 568 569
          cost = RDCOST(x->rdmult, x->rddiv,
                        vp9_get_switchable_rate(cpi) + pf_rate[filter],
                        pf_dist[filter]);
570
          pf_tx_size[filter] = mbmi->tx_size;
571
          if (cost < best_cost) {
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
            best_filter = filter;
            best_cost = cost;
            skip_txfm = x->skip_txfm;

            if (cpi->sf.reuse_inter_pred_sby) {
              if (this_mode_pred != current_pred) {
                free_pred_buffer(this_mode_pred);
                this_mode_pred = current_pred;
              }

              if (filter < EIGHTTAP_SHARP) {
                current_pred = &tmp[get_pred_buffer(tmp, 3)];
                pd->dst.buf = current_pred->data;
                pd->dst.stride = bw;
              }
            }
588
          }
589 590
        }

591 592 593
        if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
          free_pred_buffer(current_pred);

594
        mbmi->interp_filter = best_filter;
595
        mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
596 597
        rate = pf_rate[mbmi->interp_filter];
        dist = pf_dist[mbmi->interp_filter];
598 599
        var_y = pf_var[mbmi->interp_filter];
        sse_y = pf_sse[mbmi->interp_filter];
600
        x->skip_txfm = skip_txfm;
601 602 603
      } else {
        mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
604
        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
605 606
      }

607
      rate += rate_mv;
608
      rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
609 610
                                [INTER_OFFSET(this_mode)];
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
611

612 613
      // Skipping checking: test to see if this block can be reconstructed by
      // prediction only.
614
      if (cpi->allow_encode_breakout) {
615 616 617 618 619
        encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame,
                             this_mode, var_y, sse_y, yv12_mb, &rate, &dist);
        if (x->skip) {
          rate += rate_mv;
          this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
620 621 622
        }
      }

623
#if CONFIG_DENOISING
624 625 626
      if (cpi->oxcf.noise_sensitivity > 0) {
        vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y, this_mode);
      }
627 628
#endif

629
      if (this_rd < best_rd || x->skip) {
630
        best_rd = this_rd;
631 632
        *returnrate = rate;
        *returndistortion = dist;
633
        best_mode = this_mode;
634
        best_pred_filter = mbmi->interp_filter;
635
        best_tx_size = mbmi->tx_size;
636
        best_ref_frame = ref_frame;
637
        skip_txfm = x->skip_txfm;
638 639 640 641 642 643 644 645 646 647

        if (cpi->sf.reuse_inter_pred_sby) {
          if (best_pred != NULL)
            free_pred_buffer(best_pred);

          best_pred = this_mode_pred;
        }
      } else {
        if (cpi->sf.reuse_inter_pred_sby)
          free_pred_buffer(this_mode_pred);
648
      }
649 650 651

      if (x->skip)
        break;
652 653 654
    }
  }

655 656 657 658 659 660 661 662 663 664 665 666 667
  // If best prediction is not in dst buf, then copy the prediction block from
  // temp buf to dst buf.
  if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
    uint8_t *copy_from, *copy_to;

    pd->dst = orig_dst;
    copy_to = pd->dst.buf;

    copy_from = best_pred->data;

    vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0,
                      bw, bh);
  }
668

669
  mbmi->mode          = best_mode;
670
  mbmi->interp_filter = best_pred_filter;
671 672 673
  mbmi->tx_size       = best_tx_size;
  mbmi->ref_frame[0]  = best_ref_frame;
  mbmi->mv[0].as_int  = frame_mv[best_mode][best_ref_frame].as_int;
674
  xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
675
  x->skip_txfm = skip_txfm;
676

677 678
  // Perform intra prediction search, if the best SAD is above a certain
  // threshold.
679
  if (!x->skip && best_rd > inter_mode_thresh &&
680
      bsize <= cpi->sf.max_intra_bsize) {
681
    int i, j;
682 683
    const int width  = num_4x4_blocks_wide_lookup[bsize];
    const int height = num_4x4_blocks_high_lookup[bsize];
684
    const BLOCK_SIZE bsize_tx = txsize_to_bsize[mbmi->tx_size];
685 686 687

    int rate2 = 0;
    int64_t dist2 = 0;
688
    const int dst_stride = cpi->sf.reuse_inter_pred_sby ? bw : pd->dst.stride;
689
    const int src_stride = p->src.stride;
690 691
    int block_idx = 0;

692 693 694 695
    TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize],
                              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
    const int step = 1 << tmp_tx_size;

696
    for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
697 698 699 700 701
      if (cpi->sf.reuse_inter_pred_sby) {
        pd->dst.buf = tmp[0].data;
        pd->dst.stride = bw;
      }

702 703 704
      for (j = 0; j < height; j += step) {
        for (i = 0; i < width; i += step) {
          vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize),
705
                                  tmp_tx_size, this_mode,
706 707 708 709
                                  &p->src.buf[4 * (j * dst_stride + i)],
                                  src_stride,
                                  &pd->dst.buf[4 * (j * dst_stride + i)],
                                  dst_stride, i, j, 0);
710
          model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
711 712 713 714 715
          rate2 += rate;
          dist2 += dist;
          ++block_idx;
        }
      }
716

717 718
      rate = rate2;
      dist = dist2;
719

720
      rate += cpi->mbmode_cost[this_mode];
721
      rate += intra_cost_penalty;
722
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
723

724 725 726
      if (cpi->sf.reuse_inter_pred_sby)
        pd->dst = orig_dst;

727 728
      if (this_rd + intra_mode_cost < best_rd) {
        best_rd = this_rd;
729 730
        *returnrate = rate;
        *returndistortion = dist;
731
        mbmi->mode = this_mode;
732
        mbmi->tx_size = tmp_tx_size;
733 734
        mbmi->ref_frame[0] = INTRA_FRAME;
        mbmi->uv_mode = this_mode;
735
        mbmi->mv[0].as_int = INVALID_MV;
736 737
      } else {
        x->skip_txfm = skip_txfm;
738 739 740
      }
    }
  }
741

742
#if CONFIG_DENOISING
743 744 745
  if (cpi->oxcf.noise_sensitivity > 0) {
    vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
  }
746
#endif
747

748 749
  return INT64_MAX;
}