vp9_pickmode.c 70.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16
#include "./vp9_rtcd.h"
17
#include "./vpx_dsp_rtcd.h"
18

19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_mem/vpx_mem.h"
21
#include "vpx_ports/mem.h"
22

23
#include "vp9/common/vp9_blockd.h"
24 25
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
26
#include "vp9/common/vp9_pred_common.h"
27 28
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
29
#include "vp9/common/vp9_scan.h"
30

31
#include "vp9/encoder/vp9_cost.h"
32
#include "vp9/encoder/vp9_encoder.h"
33
#include "vp9/encoder/vp9_pickmode.h"
34
#include "vp9/encoder/vp9_ratectrl.h"
35
#include "vp9/encoder/vp9_rd.h"
36

37 38 39 40 41 42
typedef struct {
  uint8_t *data;
  int stride;
  int in_use;
} PRED_BUFFER;

43 44
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
                      const MACROBLOCKD *xd,
45 46 47 48
                      const TileInfo *const tile,
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                      int_mv *mv_ref_list,
                      int mi_row, int mi_col) {
49 50 51 52 53 54 55 56 57 58
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
James Zern's avatar
James Zern committed
59
  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
60 61 62 63 64 65 66 67

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
68
                                                   xd->mi_stride];
69 70 71 72 73 74
      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
      // Keep counts for entropy encoding.
      context_counter += mode_2_counter[candidate->mode];
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
75 76
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
                        refmv_count, mv_ref_list, Done);
77 78 79 80 81 82 83 84 85 86 87 88
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
89
                                                    xd->mi_stride]->mbmi;
90 91 92
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
93
        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
94 95 96 97 98 99 100 101 102 103 104
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
105
                                              * xd->mi_stride]->mbmi;
106 107

        // If the candidate is INTRA we don't want to consider its mv.
108 109
        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
                                 refmv_count, mv_ref_list, Done);
110 111 112 113 114 115
      }
    }
  }

 Done:

116
  x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
117 118 119 120 121 122 123 124

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

125 126 127 128
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
                                  int64_t best_rd_sofar) {
129
  MACROBLOCKD *xd = &x->e_mbd;
130
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
131
  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
132 133
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
134
  MV mvp_full;
135
  const int ref = mbmi->ref_frame[0];
136
  const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
137 138 139 140 141 142 143
  int dis;
  int rate_mode;
  const int tmp_col_min = x->mv_col_min;
  const int tmp_col_max = x->mv_col_max;
  const int tmp_row_min = x->mv_row_min;
  const int tmp_row_max = x->mv_row_max;
  int rv = 0;
144
  int cost_list[5];
145 146
  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                        ref);
147 148 149 150 151 152 153
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
    for (i = 0; i < MAX_MB_PLANE; i++)
      backup_yv12[i] = xd->plane[i].pre[0];
154
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
155
  }
156
  vp9_set_mv_search_range(x, &ref_mv);
157

158 159
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
160
    mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
161
  else
162
    mvp_full = x->pred_mv[ref];
163 164 165 166

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

167
  vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
168
                        cond_cost_list(cpi, cost_list),
169
                        &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
170

171 172 173 174 175
  x->mv_col_min = tmp_col_min;
  x->mv_col_max = tmp_col_max;
  x->mv_row_min = tmp_row_min;
  x->mv_row_max = tmp_row_max;

176 177 178
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
179

180 181
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                             x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
182

183
  rate_mode = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]]
184 185 186 187 188 189 190 191 192 193 194
                                  [INTER_OFFSET(NEWMV)];
  rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
         best_rd_sofar);

  if (rv) {
    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                 cpi->common.allow_high_precision_mv,
                                 x->errorperbit,
                                 &cpi->fn_ptr[bsize],
                                 cpi->sf.mv.subpel_force_stop,
                                 cpi->sf.mv.subpel_iters_per_step,
195
                                 cond_cost_list(cpi, cost_list),
196
                                 x->nmvjointcost, x->mvcost,
197
                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
198 199
    *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
200 201 202 203 204 205 206
  }

  if (scaled_ref_frame) {
    int i;
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = backup_yv12[i];
  }
207
  return rv;
208 209
}

210 211 212 213 214 215 216 217 218 219 220 221
static void block_variance(const uint8_t *src, int src_stride,
                           const uint8_t *ref, int ref_stride,
                           int w, int h, unsigned int *sse, int *sum,
                           int block_size, unsigned int *sse8x8,
                           int *sum8x8, unsigned int *var8x8) {
  int i, j, k = 0;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i += block_size) {
    for (j = 0; j < w; j += block_size) {
222
      vpx_get8x8var(src + src_stride * i + j, src_stride,
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
                    ref + ref_stride * i + j, ref_stride,
                    &sse8x8[k], &sum8x8[k]);
      *sse += sse8x8[k];
      *sum += sum8x8[k];
      var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
      k++;
    }
  }
}

static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
                               unsigned int *sse_i, int *sum_i,
                               unsigned int *var_o, unsigned int *sse_o,
                               int *sum_o) {
  const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  int i, j, k = 0;

  for (i = 0; i < nh; i += 2) {
    for (j = 0; j < nw; j += 2) {
      sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
          sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
      sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
          sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
      var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
          (b_width_log2_lookup[unit_size] +
              b_height_log2_lookup[unit_size] + 6));
      k++;
    }
  }
}

static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
                                    MACROBLOCK *x, MACROBLOCKD *xd,
                                    int *out_rate_sum, int64_t *out_dist_sum,
                                    unsigned int *var_y, unsigned int *sse_y,
                                    int mi_row, int mi_col, int *early_term) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
  const int64_t dc_thr = dc_quant * dc_quant >> 6;
  const int64_t ac_thr = ac_quant * ac_quant >> 6;
  unsigned int var;
  int sum;
  int skip_dc = 0;

  const int bw = b_width_log2_lookup[bsize];
  const int bh = b_height_log2_lookup[bsize];
  const int num8x8 = 1 << (bw + bh - 2);
  unsigned int sse8x8[64] = {0};
  int sum8x8[64] = {0};
  unsigned int var8x8[64] = {0};
  TX_SIZE tx_size;
  int i, k;

  // Calculate variance for whole partition, and also save 8x8 blocks' variance
  // to be used in following transform skipping test.
  block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
  var = sse - (((int64_t)sum * sum) >> (bw + bh + 4));

  *var_y = var;
  *sse_y = sse;

  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
297 298
      tx_size = VPXMIN(max_txsize_lookup[bsize],
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
299 300 301
    else
      tx_size = TX_8X8;

302 303 304 305 306
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      tx_size = TX_8X8;
    else if (tx_size > TX_16X16)
      tx_size = TX_16X16;
307
  } else {
308 309
    tx_size = VPXMIN(max_txsize_lookup[bsize],
                     tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
310 311 312
  }

  assert(tx_size >= TX_8X8);
313
  xd->mi[0]->mbmi.tx_size = tx_size;
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344

  // Evaluate if the partition block is a skippable block in Y plane.
  {
    unsigned int sse16x16[16] = {0};
    int sum16x16[16] = {0};
    unsigned int var16x16[16] = {0};
    const int num16x16 = num8x8 >> 2;

    unsigned int sse32x32[4] = {0};
    int sum32x32[4] = {0};
    unsigned int var32x32[4] = {0};
    const int num32x32 = num8x8 >> 4;

    int ac_test = 1;
    int dc_test = 1;
    const int num = (tx_size == TX_8X8) ? num8x8 :
        ((tx_size == TX_16X16) ? num16x16 : num32x32);
    const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 :
        ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
    const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 :
        ((tx_size == TX_16X16) ? var16x16 : var32x32);

    // Calculate variance if tx_size > TX_8X8
    if (tx_size >= TX_16X16)
      calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
                         sum16x16);
    if (tx_size == TX_32X32)
      calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
                         sse32x32, sum32x32);

    // Skipping test
345
    x->skip_txfm[0] = SKIP_TXFM_NONE;
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    for (k = 0; k < num; k++)
      // Check if all ac coefficients can be quantized to zero.
      if (!(var_tx[k] < ac_thr || var == 0)) {
        ac_test = 0;
        break;
      }

    for (k = 0; k < num; k++)
      // Check if dc coefficient can be quantized to zero.
      if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
        dc_test = 0;
        break;
      }

    if (ac_test) {
361
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
362 363

      if (dc_test)
364
        x->skip_txfm[0] = SKIP_TXFM_AC_DC;
365 366 367 368 369
    } else if (dc_test) {
      skip_dc = 1;
    }
  }

370
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
371 372 373 374 375 376 377 378 379 380 381
    int skip_uv[2] = {0};
    unsigned int var_uv[2];
    unsigned int sse_uv[2];

    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;

    // Transform skipping test in UV planes.
    for (i = 1; i <= 2; i++) {
      struct macroblock_plane *const p = &x->plane[i];
      struct macroblockd_plane *const pd = &xd->plane[i];
382
      const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd);
383
      const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
384 385 386 387 388
      const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
      const int uv_bw = b_width_log2_lookup[uv_bsize];
      const int uv_bh = b_height_log2_lookup[uv_bsize];
      const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
          (uv_bh - b_height_log2_lookup[unit_size]);
389 390 391 392 393
      const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
      const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
      int j = i - 1;

      vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
394 395
      var_uv[j] = cpi->fn_ptr[uv_bsize].vf(p->src.buf, p->src.stride,
          pd->dst.buf, pd->dst.stride, &sse_uv[j]);
396

397 398 399 400 401
      if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
          (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
        skip_uv[j] = 1;
      else
        break;
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
    }

    // If the transform in YUV planes are skippable, the mode search checks
    // fewer inter modes and doesn't check intra modes.
    if (skip_uv[0] & skip_uv[1]) {
      *early_term = 1;
    }

    return;
  }

  if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
  }

  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }

#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
  } else {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> 3, &rate, &dist);
  }
#else
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                               ac_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH

  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
}

453 454
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
455 456
                              int *out_rate_sum, int64_t *out_dist_sum,
                              unsigned int *var_y, unsigned int *sse_y) {
457 458 459 460
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
461 462
  int rate;
  int64_t dist;
463 464
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
465 466
  const int64_t dc_thr = p->quant_thred[0] >> 6;
  const int64_t ac_thr = p->quant_thred[1] >> 6;
467 468
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
469 470
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
471 472
  int skip_dc = 0;

473 474 475
  *var_y = var;
  *sse_y = sse;

476 477
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
478
      xd->mi[0]->mbmi.tx_size =
479 480
          VPXMIN(max_txsize_lookup[bsize],
                 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
481
    else
482
      xd->mi[0]->mbmi.tx_size = TX_8X8;
483

484 485 486 487 488
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      xd->mi[0]->mbmi.tx_size = TX_8X8;
    else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
      xd->mi[0]->mbmi.tx_size = TX_16X16;
489
  } else {
490
    xd->mi[0]->mbmi.tx_size =
491 492
        VPXMIN(max_txsize_lookup[bsize],
               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
493 494
  }

495 496 497
  // Evaluate if the partition block is a skippable block in Y plane.
  {
    const BLOCK_SIZE unit_size =
498
        txsize_to_bsize[xd->mi[0]->mbmi.tx_size];
499 500 501 502 503 504
    const unsigned int num_blk_log2 =
        (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
        (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
    const unsigned int sse_tx = sse >> num_blk_log2;
    const unsigned int var_tx = var >> num_blk_log2;

505
    x->skip_txfm[0] = SKIP_TXFM_NONE;
506 507
    // Check if all ac coefficients can be quantized to zero.
    if (var_tx < ac_thr || var == 0) {
508
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
509 510
      // Check if dc coefficient can be quantized to zero.
      if (sse_tx - var_tx < dc_thr || sse == var)
511
        x->skip_txfm[0] = SKIP_TXFM_AC_DC;
512 513 514
    } else {
      if (sse_tx - var_tx < dc_thr || sse == var)
        skip_dc = 1;
515 516 517
    }
  }

518
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
519 520 521 522 523
    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;
    return;
  }

524
  if (!skip_dc) {
525
#if CONFIG_VP9_HIGHBITDEPTH
526 527 528 529 530 531 532 533
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
534
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
535 536
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
537
  }
538

539 540 541 542 543 544 545
  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }
546

547 548
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
549
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
550
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
551
  } else {
552
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
553
                                 ac_quant >> 3, &rate, &dist);
554 555
  }
#else
556
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
557
                               ac_quant >> 3, &rate, &dist);
558 559
#endif  // CONFIG_VP9_HIGHBITDEPTH

560 561
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
562 563
}

564 565 566 567 568 569 570 571 572 573 574 575 576 577
#if CONFIG_VP9_HIGHBITDEPTH
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int var_y, sse_y;
  (void)plane;
  (void)tx_size;
  model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
  *sse = INT_MAX;
  *skippable = 0;
  return;
}
#else
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const struct macroblock_plane *const p = &x->plane[plane];
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  const int step = 1 << (tx_size << 1);
  const int block_step = (1 << tx_size);
  int block = 0, r, c;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
  const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
594
  int eob_cost = 0;
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610

  (void)cpi;
  vp9_subtract_plane(x, bsize, plane);
  *skippable = 1;
  // Keep track of the row and column of the blocks we use so that we know
  // if we are in the unrestricted motion border.
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
        const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
        const int16_t *src_diff;
611
        src_diff = &p->src_diff[(r * diff_stride + c) << 2];
612 613 614

        switch (tx_size) {
          case TX_32X32:
615
            vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
616 617 618 619 620 621
            vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                  p->round_fp, p->quant_fp, p->quant_shift,
                                  qcoeff, dqcoeff, pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
            break;
          case TX_16X16:
622
            vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
            vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_8X8:
            vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
            vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_4X4:
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          default:
            assert(0);
            break;
        }
646
        *skippable &= (*eob == 0);
647
        eob_cost += 1;
648 649 650 651 652 653
      }
      block += step;
    }
  }

  if (*skippable && *sse < INT64_MAX) {
654
    *rate = 0;
655 656 657 658
    *dist = (*sse << 6) >> shift;
    *sse = *dist;
    return;
  }
659

660 661 662
  block = 0;
  *rate = 0;
  *dist = 0;
663 664
  if (*sse < INT64_MAX)
    *sse = (*sse << 6) >> shift;
665 666 667 668 669 670 671
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
672 673 674 675 676

        if (*eob == 1)
          *rate += (int)abs(qcoeff[0]);
        else if (*eob > 1)
          *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
677

678
        *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
679 680 681 682 683
      }
      block += step;
    }
  }

684 685 686 687
  if (*skippable == 0) {
    *rate <<= 10;
    *rate += (eob_cost << 8);
  }
688
}
689
#endif
690

691
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
692 693
                               MACROBLOCK *x, MACROBLOCKD *xd,
                               int *out_rate_sum, int64_t *out_dist_sum,
694 695
                               unsigned int *var_y, unsigned int *sse_y,
                               int start_plane, int stop_plane) {
696 697 698 699 700 701 702 703 704 705 706
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  int i;

  *out_rate_sum = 0;
  *out_dist_sum = 0;

707
  for (i = start_plane; i <= stop_plane; ++i) {
708 709 710 711
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const uint32_t dc_quant = pd->dequant[0];
    const uint32_t ac_quant = pd->dequant[1];
712
    const BLOCK_SIZE bs = plane_bsize;
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
    unsigned int var;

    if (!x->color_sensitivity[i - 1])
      continue;

    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                             pd->dst.buf, pd->dst.stride, &sse);
    *var_y += var;
    *sse_y += sse;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate >> 1;
    *out_dist_sum += dist << 3;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                 ac_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate;
    *out_dist_sum += dist << 4;
  }
}

757 758 759 760 761 762 763 764 765 766 767 768 769
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
770 771
  if (p != NULL)
    p->in_use = 0;
772 773
}

774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
                                 MV_REFERENCE_FRAME ref_frame,
                                 PREDICTION_MODE this_mode,
                                 unsigned int var_y, unsigned int sse_y,
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;

  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
  if (x->encode_breakout > 0) {
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
796
        VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
797
#if CONFIG_VP9_HIGHBITDEPTH
798
    const int shift = (xd->bd << 1) - 16;
799
#endif
800 801

    // Calculate threshold according to dequant value.
802
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
803 804 805 806 807
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
808 809 810 811
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
812
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
813 814

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
815 816 817 818 819
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }

  // Y skipping condition checking for ac and dc.
  if (var <= thresh_ac && (sse - var) <= thresh_dc) {
    unsigned int sse_u, sse_v;
    unsigned int var_u, var_v;

    // Skip UV prediction unless breakout is zero (lossless) to save
    // computation with low impact on the result
    if (x->encode_breakout == 0) {
      xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
      xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
      vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
    }

    var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
                                    x->plane[1].src.stride,
                                    xd->plane[1].dst.buf,
                                    xd->plane[1].dst.stride, &sse_u);

    // U skipping condition checking
844
    if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
845 846 847 848 849 850
      var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
                                      x->plane[2].src.stride,
                                      xd->plane[2].dst.buf,
                                      xd->plane[2].dst.stride, &sse_v);

      // V skipping condition checking
851
      if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
852 853 854
        x->skip = 1;

        // The cost of skip bit needs to be added.
855
        *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
856
                                    [INTER_OFFSET(this_mode)];
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872

        // More on this part of rate
        // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

        // Scaling factor for SSE from spatial domain to frequency
        // domain is 16. Adjust distortion accordingly.
        // TODO(yunqingwang): In this function, only y-plane dist is
        // calculated.
        *dist = (sse << 4);  // + ((sse_u + sse_v) << 4);

        // *disable_skip = 1;
      }
    }
  }
}

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
struct estimate_block_intra_args {
  VP9_COMP *cpi;
  MACROBLOCK *x;
  PREDICTION_MODE mode;
  int rate;
  int64_t dist;
};

static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                 TX_SIZE tx_size, void *arg) {
  struct estimate_block_intra_args* const args = arg;
  VP9_COMP *const cpi = args->cpi;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  uint8_t *const src_buf_base = p->src.buf;
  uint8_t *const dst_buf_base = pd->dst.buf;
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  int i, j;
  int rate;
  int64_t dist;
897

898 899 900 901 902
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);

  p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
  pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
  // Use source buffer as an approximation for the fully reconstructed buffer.
903
  vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
904
                          tx_size, args->mode,
905 906
                          x->skip_encode ? p->src.buf : pd->dst.buf,
                          x->skip_encode ? src_stride : dst_stride,
907
                          pd->dst.buf, dst_stride,
908
                          i, j, plane);
909

910 911 912 913 914 915 916 917 918 919 920 921 922 923
  if (plane == 0) {
    int64_t this_sse = INT64_MAX;
    int is_skippable;
    // TODO(jingning): This needs further refactoring.
    block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
              bsize_tx, VPXMIN(tx_size, TX_16X16));
    x->skip_txfm[0] = is_skippable;
    // TODO(jingning): Skip is signalled per prediciton block not per tx block.
    rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
  } else {
    unsigned int var, sse;
    model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
                       plane, plane);
  }
924

925 926 927 928 929 930
  p->src.buf = src_buf_base;
  pd->dst.buf = dst_buf_base;
  args->rate += rate;
  args->dist += dist;
}

931
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
932
  {THR_DC, THR_V_PRED, THR_H_PRED, THR_TM},
933 934 935 936
  {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
  {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
};

937 938 939 940
static const PREDICTION_MODE intra_mode_list[] = {
  DC_PRED, V_PRED, H_PRED, TM_PRED
};

941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
static int mode_offset(const PREDICTION_MODE mode) {
  if (mode >= NEARESTMV) {
    return INTER_OFFSET(mode);
  } else {
    switch (mode) {
      case DC_PRED:
        return 0;
      case V_PRED:
        return 1;
      case H_PRED:
        return 2;
      case TM_PRED:
        return 3;
      default:
        return -1;
    }
  }
}

static INLINE void update_thresh_freq_fact(VP9_COMP *cpi,
                                           TileDataEnc *tile_data,
                                           BLOCK_SIZE bsize,
                                           MV_REFERENCE_FRAME ref_frame,
                                           THR_MODES best_mode_idx,
                                           PREDICTION_MODE mode) {
  THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
  if (thr_mode_idx == best_mode_idx)
    *freq_fact -= (*freq_fact >> 4);
  else
971 972
    *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
                        cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
973 974
}

975 976 977
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  MACROBLOCKD *const xd = &x->e_mbd;
978
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
979 980 981 982
  RD_COST this_rdc, best_rdc;
  PREDICTION_MODE this_mode;
  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
  const TX_SIZE intra_tx_size =
983 984
      VPXMIN(max_txsize_lookup[bsize],
             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
985
  MODE_INFO *const mic = xd->mi[0];
986
  int *bmode_costs;
987 988
  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
989 990 991 992 993 994 995 996 997 998 999
  const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  bmode_costs = cpi->y_mode_costs[A][L];

  (void) ctx;
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(&this_rdc);

  mbmi->ref_frame[0] = INTRA_FRAME;
  mbmi->mv[0].as_int = INVALID_MV;
  mbmi->uv_mode = DC_PRED;
James Zern's avatar
James Zern committed
1000
  memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

  // Change the limit of this loop to add other intra prediction
  // mode tests.
  for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
    args.mode = this_mode;
    args.rate = 0;
    args.dist = 0;
    mbmi->tx_size = intra_tx_size;
    vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                           estimate_block_intra, &args);
    this_rdc.rate = args.rate;
    this_rdc.dist = args.dist;
    this_rdc.rate += bmode_costs[this_mode];
    this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
                             this_rdc.rate, this_rdc.dist);

    if (this_rdc.rdcost < best_rdc.rdcost) {
      best_rdc = this_rdc;
      mbmi->mode = this_mode;
    }
  }

  *rd_cost = best_rdc;
}

1026 1027 1028
static void init_ref_frame_cost(VP9_COMMON *const cm,
                                MACROBLOCKD *const xd,
                                int ref_frame_cost[MAX_REF_FRAMES]) {
1029 1030 1031
  vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043

  ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
    ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);

  ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
}

1044 1045 1046 1047 1048 1049 1050 1051 1052
typedef struct {
  MV_REFERENCE_FRAME ref_frame;
  PREDICTION_MODE pred_mode;
} REF_MODE;

#define RT_INTER_MODES 8
static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
1053
    {GOLDEN_FRAME, ZEROMV},
1054 1055 1056 1057 1058 1059
    {LAST_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {GOLDEN_FRAME, NEWMV}
};
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {GOLDEN_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
    {LAST_FRAME, NEARMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEWMV}
};
1070

1071 1072
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
1073
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
1074
                         TileDataEnc *tile_data,
1075 1076
                         int mi_row, int mi_col, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
1077
  VP9_COMMON *const cm = &cpi->common;
1078
  SPEED_FEATURES *const sf = &cpi->sf;
1079
  TileInfo *const tile_info = &tile_data->tile_info;
1080
  MACROBLOCKD *const xd = &x->e_mbd;
1081
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1082
  struct macroblockd_plane *const pd = &xd->plane[0];
1083
  PREDICTION_MODE best_mode = ZEROMV;
1084
  MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
1085
  MV_REFERENCE_FRAME usable_ref_frame;
1086
  TX_SIZE best_tx_size = TX_SIZES;
1087
  INTERP_FILTER best_pred_filter = EIGHTTAP;
1088 1089 1090 1091
  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                    VP9_ALT_FLAG };
1092
  RD_COST this_rdc, best_rdc;
1093
  uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE;
1094 1095 1096
  // var_y and sse_y are saved to be used in skipping checking
  unsigned int var_y = UINT_MAX;
  unsigned int sse_y = UINT_MAX;
1097
  // Reduce the intra cost penalty for small blocks (<=16x16).
1098 1099
  const int reduction_fac = (bsize <= BLOCK_16X16) ?
      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
1100
  const int intra_cost_penalty = vp9_get_intra_cost_penalty(
1101
      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
1102 1103
  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                           intra_cost_penalty, 0);
1104
  const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
1105
  const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
1106
  INTERP_FILTER filter_ref;
1107
  const int bsl = mi_width_log2_lookup[bsize];
1108
  const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
1109 1110
      (((mi_row + mi_col) >> bsl) +
       get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
1111
  int const_motion[MAX_REF_FRAMES] = { 0 };
1112 1113
  const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
1114 1115
  // For speed 6, the result of interp filter is reused later in actual encoding
  // process.
1116 1117
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  PRED_BUFFER tmp[4];
1118
  DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
1119
#if CONFIG_VP9_HIGHBITDEPTH
1120
  DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
1121
#endif
1122 1123 1124
  struct buf_2d orig_dst = pd->dst;
  PRED_BUFFER *best_pred = NULL;
  PRED_BUFFER *this_mode_pred = NULL;
1125
  const int pixels_in_block = bh * bw;
1126
  int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
1127
  int ref_frame_skip_mask = 0;
1128
  int idx;
1129
  int best_pred_sad = INT_MAX;
1130
  int best_early_term = 0;
1131 1132
  int ref_frame_cost[MAX_REF_FRAMES];

1133
  init_ref_frame_cost(cm, xd, ref_frame_cost);
1134

1135
  if (reuse_inter_pred) {
1136
    int i;
1137
    for (i = 0; i < 3; i++) {
1138 1139 1140 1141 1142 1143 1144 1145
#if CONFIG_VP9_HIGHBITDEPTH
      if (cm->use_highbitdepth)
        tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
      else
        tmp[i].data = &pred_buf[pixels_in_block * i];
#else
      tmp[i].data = &pred_buf[pixels_in_block * i];
#endif  // CONFIG_VP9_HIGHBITDEPTH
1146 1147 1148 1149 1150 1151 1152 1153
      tmp[i].stride = bw;
      tmp[i].in_use = 0;
    }
    tmp[3].data = pd->dst.buf;
    tmp[3].stride = pd->dst.stride;
    tmp[3].in_use = 0;
  }

1154 1155
  x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  x->skip = 0;
1156

1157
  if (xd->up_available)
1158
    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
1159
  else if (xd->left_available)
1160
    filter_ref = xd->mi[-1]->mbmi.interp_filter;
1161 1162 1163
  else
    filter_ref = cm->interp_filter;

1164
  // initialize mode decisions
1165 1166
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(rd_cost);
1167 1168 1169
  mbmi->sb_type = bsize;
  mbmi->ref_frame[0] = NONE;
  mbmi->ref_frame[1] = NONE;
1170 1171
  mbmi->tx_size = VPXMIN(max_txsize_lookup[bsize],
                         tx_mode_to_biggest_tx_size[cm->tx_mode]);
1172

1173 1174 1175
#if CONFIG_VP9_TEMPORAL_DENOISING
  vp9_denoiser_reset_frame_stats(ctx);
#endif
1176

Marco's avatar
Marco committed
1177
  if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc) {
1178 1179 1180 1181 1182
    usable_ref_frame = LAST_FRAME;
  } else {
    usable_ref_frame = GOLDEN_FRAME;
  }
  for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
1183 1184
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);

1185
    x->pred_mv_sad[ref_frame] = INT_MAX;
1186 1187 1188
    frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
    frame_mv[ZEROMV][ref_frame].as_int = 0;

1189
    if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
1190
      int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
1191
      const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
1192

1193 1194 1195
      vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                           sf, sf);

1196
      if (cm->use_prev_frame_mvs)
Scott LaVarnway's avatar
Scott LaVarnway committed
1197
        vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
1198
                         candidates, mi_row, mi_col, NULL, NULL,
1199
                         x->mbmi_ext->mode_context);
1200
      else
1201
        const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info,
1202
                                             xd->mi[0],
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
                                             ref_frame, candidates,
                                             mi_row, mi_col);

      vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
                            &frame_mv[NEARESTMV][ref_frame],
                            &frame_mv[NEARMV][ref_frame]);

      if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
        vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
                    ref_frame, bsize);
1213
    } else {
1214
      ref_frame_skip_mask |= (1 << ref_frame);
1215
    }
1216 1217
  }

1218 1219 1220 1221 1222
  for (idx = 0; idx < RT_INTER_MODES; ++idx) {
    int rate_mv = 0;
    int mode_rd_thresh;
    int mode_index;
    int i;
1223 1224
    int64_t this_sse;
    int is_skippable;
1225
    int this_early_term = 0;
1226 1227 1228
    PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
    if (cpi->use_svc)
      this_mode = ref_mode_set_svc[idx].pred_mode;
1229

1230 1231
    if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
      continue;
1232 1233

    ref_frame = ref_mode_set[idx].ref_frame;
1234 1235
    if (cpi->use_svc)
      ref_frame = ref_mode_set_svc[idx].ref_frame;
1236 1237
    if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
      continue;
1238 1239
    if (const_motion[ref_frame] && this_mode == NEARMV)
      continue;
1240

1241 1242 1243 1244 1245 1246
    if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) {
      i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
      if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
        if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
          ref_frame_skip_mask |= (1 << ref_frame);
    }
1247 1248
    if (ref_frame_skip_mask & (1 << ref_frame))
      continue;
1249 1250

    // Select prediction reference frames.
1251 1252
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
1253

1254
    mbmi->ref_frame[0] = ref_frame;
1255
    set_ref_ptrs(cm, xd, ref_frame, NONE);
1256

1257
    mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
1258 1259 1260 1261 1262
    mode_rd_thresh = best_mode_skip_txfm ?
            rd_threshes[mode_index] << 1 : rd_threshes[mode_index];
    if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
                            rd_thresh_freq_fact[mode_index]))
      continue;
1263

1264
    if (this_mode == NEWMV) {
1265
      if (ref_frame > LAST_FRAME && !cpi->use_svc) {
1266 1267
        int tmp_sad;
        int dis, cost_list[5];
1268

1269
        if (bsize < BLOCK_16X16)
1270
          continue;
1271

1272
        tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
1273

1274
        if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
1275
          continue;
1276 1277
        if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
          continue;
1278

1279 1280
        frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
        rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
1281
          &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
1282 1283 1284 1285 1286
          x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
        frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
        frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;

        cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
1287
          &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
          cpi->common.allow_high_precision_mv,
          x->errorperbit,
          &cpi->fn_ptr[bsize],
          cpi->sf.mv.subpel_force_stop,
          cpi->sf.mv.subpel_iters_per_step,
          cond_cost_list(cpi, cost_list),
          x->nmvjointcost, x->mvcost, &dis,
          &x->pred_sse[ref_frame], NULL, 0, 0);
      } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
        &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) {
1298
        continue;
1299 1300
      }
    }
1301

1302 1303 1304 1305 1306 1307 1308 1309 1310
    if (this_mode == NEWMV && ref_frame == LAST_FRAME &&
        frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
      const int pre_stride = xd->plane[0].pre[0].stride;
      const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
          (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride +
          (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3);
      best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
                                   x->plane[0].src.stride,
                                   pre_buf, pre_stride);
1311
      x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
1312 1313
    }

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
    if (cpi->use_svc) {
      if (this_mode == NEWMV && ref_frame == GOLDEN_FRAME &&
          frame_mv[NEWMV][GOLDEN_FRAME].as_int != INVALID_MV) {
        const int pre_stride = xd->plane[0].pre[0].stride;
        const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
            (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.row >> 3) * pre_stride +
            (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.col >> 3);
        best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
                                               x->plane[0].src.stride,
                                               pre_buf, pre_stride);
        x->pred_mv_sad[GOLDEN_FRAME] = best_pred_sad;
      }
    }


1329 1330 1331
    if (this_mode != NEARESTMV &&
        frame_mv[this_mode][ref_frame].as_int ==
            frame_mv[NEARESTMV][ref_frame].as_int)
1332
      continue;
1333

1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
    mbmi->mode = this_mode;
    mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;

    // Search for the best prediction filter type, when the resulting
    // motion vector is at sub-pixel accuracy level for luma component, i.e.,
    // the last three bits are all zeros.
    if (reuse_inter_pred) {
      if (!this_mode_pred) {
        this_mode_pred = &tmp[3];
      } else {
        this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
        pd->dst.buf = this_mode_pred->data;
        pd->dst.stride = bw;
1347
      }
1348 1349 1350
    }

    if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
1351 1352
        && (ref_frame == LAST_FRAME ||
            (ref_frame == GOLDEN_FRAME && cpi->use_svc))
1353 1354 1355 1356 1357 1358 1359 1360 1361 1362
        && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) {
      int pf_rate[3];
      int64_t pf_dist[3];
      unsigned int pf_var[3];
      unsigned int pf_sse[3];
      TX_SIZE pf_tx_size[3];
      int64_t best_cost = INT64_MAX;
      INTERP_FILTER best_filter = SWITCHABLE, filter;
      PRED_BUFFER *current_pred = this_mode_pred;

1363
      for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
        int64_t cost;
        mbmi->interp_filter = filter;
        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
                          &pf_var[filter], &pf_sse[filter]);
        pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
        cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
        pf_tx_size[filter] = mbmi->tx_size;
        if (cost < best_cost) {
          best_filter = filter;
          best_cost = cost;
          skip_txfm = x->skip_txfm[0];

          if (reuse_inter_pred) {
            if (this_mode_pred != current_pred) {
              free_pred_buffer(this_mode_pred);
              this_mode_pred = current_pred;
            }
1382

1383 1384 1385 1386
            if (filter < EIGHTTAP_SHARP) {
              current_pred = &tmp[get_pred_buffer(tmp, 3)];
              pd->dst.buf = current_pred->data;
              pd->dst.stride = bw;
1387
            }
1388
          }
1389
        }
1390
      }
1391

1392 1393
      if (reuse_inter_pred && this_mode_pred != current_pred)
        free_pred_buffer(current_pred);
1394

1395
      mbmi->interp_filter = best_filter;
1396 1397 1398 1399 1400
      mbmi->tx_size = pf_tx_size[best_filter];
      this_rdc.r