vp9_encodeframe.c 194 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11 12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>

Jim Bankoski's avatar
Jim Bankoski committed
15
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
16
#include "./vpx_dsp_rtcd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
17 18
#include "./vpx_config.h"

19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_ports/mem.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
21
#include "vpx_ports/vpx_timer.h"
22
#include "vpx_ports/system_state.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
23

24
#include "vp9/common/vp9_common.h"
Yaowu Xu's avatar
Yaowu Xu committed
25
#include "vp9/common/vp9_entropy.h"
26
#include "vp9/common/vp9_entropymode.h"
27
#include "vp9/common/vp9_idct.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
28 29 30
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
31
#include "vp9/common/vp9_reconintra.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
32
#include "vp9/common/vp9_reconinter.h"
33
#include "vp9/common/vp9_seg_common.h"
34
#include "vp9/common/vp9_tile_common.h"
35

36
#include "vp9/encoder/vp9_aq_360.h"
37
#include "vp9/encoder/vp9_aq_complexity.h"
Marco Paniconi's avatar
Marco Paniconi committed
38 39
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
40 41 42
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
43
#include "vp9/encoder/vp9_ethread.h"
44
#include "vp9/encoder/vp9_extend.h"
45
#include "vp9/encoder/vp9_pickmode.h"
46
#include "vp9/encoder/vp9_rd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
47 48
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
49
#include "vp9/encoder/vp9_tokenize.h"
50

clang-format's avatar
clang-format committed
51 52 53
static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                              int output_enabled, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
// Machine learning-based early termination parameters.
static const double train_mean[24] = {
  303501.697372, 3042630.372158, 24.694696, 1.392182,
  689.413511,    162.027012,     1.478213,  0.0,
  135382.260230, 912738.513263,  28.845217, 1.515230,
  544.158492,    131.807995,     1.436863,  0.0,
  43682.377587,  208131.711766,  28.084737, 1.356677,
  138.254122,    119.522553,     1.252322,  0.0
};

static const double train_stdm[24] = {
  673689.212982, 5996652.516628, 0.024449, 1.989792,
  985.880847,    0.014638,       2.001898, 0.0,
  208798.775332, 1812548.443284, 0.018693, 1.838009,
  396.986910,    0.015657,       1.332541, 0.0,
  55888.847031,  448587.962714,  0.017900, 1.904776,
  98.652832,     0.016598,       1.320992, 0.0
};

// Error tolerance: 0.01%-0.0.05%-0.1%
static const double classifiers[24] = {
  0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
  0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
  0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
  0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
};

82
// This is used as a reference when computing the source variance for the
83
//  purpose of activity masking.
84 85
// Eventually this should be replaced by custom no-reference routines,
//  which will be faster.
86
static const uint8_t VP9_VAR_OFFS[64] = {
clang-format's avatar
clang-format committed
87 88 89 90 91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
92 93
};

94 95
#if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
clang-format's avatar
clang-format committed
96 97 98 99 100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
101 102 103
};

static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
clang-format's avatar
clang-format committed
104 105 106 107 108 109 110 111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
112 113 114
};

static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
clang-format's avatar
clang-format committed
115 116 117 118 119 120 121 122 123 124
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16
125 126 127
};
#endif  // CONFIG_VP9_HIGHBITDEPTH

128 129 130
unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
                                           const struct buf_2d *ref,
                                           BLOCK_SIZE bs) {
131
  unsigned int sse;
clang-format's avatar
clang-format committed
132 133
  const unsigned int var =
      cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
134
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
135 136
}

137
#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
138 139 140
unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
                                                const struct buf_2d *ref,
                                                BLOCK_SIZE bs, int bd) {
141 142 143
  unsigned int var, sse;
  switch (bd) {
    case 10:
clang-format's avatar
clang-format committed
144 145 146
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
147 148
      break;
    case 12:
clang-format's avatar
clang-format committed
149 150 151
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
152 153 154
      break;
    case 8:
    default:
clang-format's avatar
clang-format committed
155 156 157
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
158 159
      break;
  }
160 161
  return (unsigned int)ROUND64_POWER_OF_TWO((int64_t)var,
                                            num_pels_log2_lookup[bs]);
162 163 164
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

165
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
166 167
                                                   const struct buf_2d *ref,
                                                   int mi_row, int mi_col,
168
                                                   BLOCK_SIZE bs) {
169 170
  unsigned int sse, var;
  uint8_t *last_y;
171
  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
172 173 174 175 176

  assert(last != NULL);
  last_y =
      &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
  var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
177 178 179
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}

180
static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
clang-format's avatar
clang-format committed
181 182 183
                                                   int mi_row, int mi_col) {
  unsigned int var = get_sby_perpixel_diff_variance(
      cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
184
  if (var < 8)
185
    return BLOCK_64X64;
186
  else if (var < 128)
187
    return BLOCK_32X32;
188 189 190 191
  else if (var < 2048)
    return BLOCK_16X16;
  else
    return BLOCK_8X8;
192 193
}

194 195
// Lighter version of set_offsets that only sets the mode info
// pointers.
Jingning Han's avatar
Jingning Han committed
196
static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
197
                                         MACROBLOCK *const x,
clang-format's avatar
clang-format committed
198
                                         MACROBLOCKD *const xd, int mi_row,
Jingning Han's avatar
Jingning Han committed
199
                                         int mi_col) {
200
  const int idx_str = xd->mi_stride * mi_row + mi_col;
201 202
  xd->mi = cm->mi_grid_visible + idx_str;
  xd->mi[0] = cm->mi + idx_str;
203
  x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
204 205 206
}

static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
207 208
                        MACROBLOCK *const x, int mi_row, int mi_col,
                        BLOCK_SIZE bsize) {
209 210
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
211
  MODE_INFO *mi;
212 213 214
  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  const int mi_height = num_8x8_blocks_high_lookup[bsize];
  const struct segmentation *const seg = &cm->seg;
Alex Converse's avatar
Alex Converse committed
215
  MvLimits *const mv_limits = &x->mv_limits;
216

217
  set_skip_context(xd, mi_row, mi_col);
218

219 220
  set_mode_info_offsets(cm, x, xd, mi_row, mi_col);

Scott LaVarnway's avatar
Scott LaVarnway committed
221
  mi = xd->mi[0];
222 223

  // Set up destination pointers.
224
  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
225 226 227

  // Set up limit values for MV components.
  // Mv beyond the range do not produce new/different prediction block.
Alex Converse's avatar
Alex Converse committed
228 229 230 231
  mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
  mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
232 233 234

  // Set up distance of MB to edge of frame in 1/8th pel units.
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
clang-format's avatar
clang-format committed
235 236
  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
                 cm->mi_cols);
237 238 239 240 241

  // Set up source buffers.
  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);

  // R/D setup.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
242 243
  x->rddiv = cpi->rd.RDDIV;
  x->rdmult = cpi->rd.RDMULT;
244 245 246

  // Setup segment ID.
  if (seg->enabled) {
247
    if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
248
        cpi->oxcf.aq_mode != EQUATOR360_AQ) {
clang-format's avatar
clang-format committed
249 250
      const uint8_t *const map =
          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
Scott LaVarnway's avatar
Scott LaVarnway committed
251
      mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
252 253 254
    }
    vp9_init_plane_quantizers(cpi, x);

Scott LaVarnway's avatar
Scott LaVarnway committed
255
    x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
256
  } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
257
    mi->segment_id = 0;
258 259
    x->encode_breakout = cpi->encode_breakout;
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
260 261 262

  // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
  xd->tile = *tile;
263 264
}

265 266 267
static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      int mi_row, int mi_col,
                                      BLOCK_SIZE bsize) {
clang-format's avatar
clang-format committed
268 269 270 271
  const int block_width =
      VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
  const int block_height =
      VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
272 273
  const int mi_stride = xd->mi_stride;
  MODE_INFO *const src_mi = xd->mi[0];
274
  int i, j;
275

276
  for (j = 0; j < block_height; ++j)
clang-format's avatar
clang-format committed
277
    for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
278 279
}

clang-format's avatar
clang-format committed
280 281
static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
                           MACROBLOCKD *const xd, int mi_row, int mi_col,
282 283
                           BLOCK_SIZE bsize) {
  if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
284
    set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
Scott LaVarnway's avatar
Scott LaVarnway committed
285
    xd->mi[0]->sb_type = bsize;
286 287 288 289
  }
}

typedef struct {
290 291 292 293 294 295
  // This struct is used for computing variance in choose_partitioning(), where
  // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even
  // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16
  // * 16 = 2^32).
  uint32_t sum_square_error;
  int32_t sum_error;
296
  int log2_count;
297 298 299 300 301 302 303 304 305 306 307 308
  int variance;
} var;

typedef struct {
  var none;
  var horz[2];
  var vert[2];
} partition_variance;

typedef struct {
  partition_variance part_variances;
  var split[4];
309 310 311 312 313
} v4x4;

typedef struct {
  partition_variance part_variances;
  v4x4 split[4];
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
} v8x8;

typedef struct {
  partition_variance part_variances;
  v8x8 split[4];
} v16x16;

typedef struct {
  partition_variance part_variances;
  v16x16 split[4];
} v32x32;

typedef struct {
  partition_variance part_variances;
  v32x32 split[4];
} v64x64;

typedef struct {
  partition_variance *part_variances;
  var *split[4];
} variance_node;

typedef enum {
  V16X16,
  V32X32,
  V64X64,
} TREE_LEVEL;

static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
  int i;
344
  node->part_variances = NULL;
345 346
  switch (bsize) {
    case BLOCK_64X64: {
clang-format's avatar
clang-format committed
347
      v64x64 *vt = (v64x64 *)data;
348 349 350 351 352 353
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_32X32: {
clang-format's avatar
clang-format committed
354
      v32x32 *vt = (v32x32 *)data;
355 356 357 358 359 360
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_16X16: {
clang-format's avatar
clang-format committed
361
      v16x16 *vt = (v16x16 *)data;
362 363 364 365 366 367
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_8X8: {
clang-format's avatar
clang-format committed
368
      v8x8 *vt = (v8x8 *)data;
369
      node->part_variances = &vt->part_variances;
370 371 372 373 374
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_4X4: {
clang-format's avatar
clang-format committed
375
      v4x4 *vt = (v4x4 *)data;
376
      node->part_variances = &vt->part_variances;
clang-format's avatar
clang-format committed
377
      for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
378 379 380 381
      break;
    }
    default: {
      assert(0);
382
      break;
383 384 385 386 387
    }
  }
}

// Set variance values given sum square error, sum error, count.
388
static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
389 390
  v->sum_square_error = s2;
  v->sum_error = s;
391
  v->log2_count = c;
392 393 394
}

static void get_variance(var *v) {
clang-format's avatar
clang-format committed
395 396 397 398
  v->variance =
      (int)(256 * (v->sum_square_error -
                   ((v->sum_error * v->sum_error) >> v->log2_count)) >>
            v->log2_count);
399 400
}

401
static void sum_2_variances(const var *a, const var *b, var *r) {
402
  assert(a->log2_count == b->log2_count);
403
  fill_variance(a->sum_square_error + b->sum_square_error,
404
                a->sum_error + b->sum_error, a->log2_count + 1, r);
405 406 407 408
}

static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
  variance_node node;
409
  memset(&node, 0, sizeof(node));
410 411 412 413 414 415 416 417 418
  tree_to_node(data, bsize, &node);
  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
                  &node.part_variances->none);
}

clang-format's avatar
clang-format committed
419 420 421 422
static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
                               MACROBLOCKD *const xd, void *data,
                               BLOCK_SIZE bsize, int mi_row, int mi_col,
                               int64_t threshold, BLOCK_SIZE bsize_min,
Marco's avatar
Marco committed
423
                               int force_split) {
clang-format's avatar
clang-format committed
424
  VP9_COMMON *const cm = &cpi->common;
425 426 427
  variance_node vt;
  const int block_width = num_8x8_blocks_wide_lookup[bsize];
  const int block_height = num_8x8_blocks_high_lookup[bsize];
428

429 430 431
  assert(block_height == block_width);
  tree_to_node(data, bsize, &vt);

clang-format's avatar
clang-format committed
432
  if (force_split == 1) return 0;
433

434
  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
435 436
  // variance is below threshold, otherwise split will be selected.
  // No check for vert/horiz split as too few samples for variance.
437
  if (bsize == bsize_min) {
438
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
439
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
440 441
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
442
        vt.part_variances->none.variance < threshold) {
443
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
444 445 446
      return 1;
    }
    return 0;
447
  } else if (bsize > bsize_min) {
448
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
449
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
450
    // For key frame: take split for bsize above 32X32 or very high variance.
451 452
    if (cm->frame_type == KEY_FRAME &&
        (bsize > BLOCK_32X32 ||
clang-format's avatar
clang-format committed
453
         vt.part_variances->none.variance > (threshold << 4))) {
454 455 456 457 458
      return 0;
    }
    // If variance is low, take the bsize (no split).
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
459
        vt.part_variances->none.variance < threshold) {
460
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
461 462
      return 1;
    }
463

464
    // Check vertical split.
465
    if (mi_row + block_height / 2 < cm->mi_rows) {
Yaowu Xu's avatar
Yaowu Xu committed
466
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
467 468
      get_variance(&vt.part_variances->vert[0]);
      get_variance(&vt.part_variances->vert[1]);
469
      if (vt.part_variances->vert[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
470 471
          vt.part_variances->vert[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
472 473
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
474 475
        return 1;
      }
476
    }
477
    // Check horizontal split.
478
    if (mi_col + block_width / 2 < cm->mi_cols) {
Yaowu Xu's avatar
Yaowu Xu committed
479
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
480 481
      get_variance(&vt.part_variances->horz[0]);
      get_variance(&vt.part_variances->horz[1]);
482
      if (vt.part_variances->horz[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
483 484
          vt.part_variances->horz[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
485 486
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
487 488
        return 1;
      }
489
    }
490

491
    return 0;
492 493 494 495
  }
  return 0;
}

496 497 498
static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
                                         int width, int height,
                                         int content_state) {
499 500 501 502
  if (speed >= 8) {
    if (width <= 640 && height <= 480)
      return (5 * threshold_base) >> 2;
    else if ((content_state == kLowSadLowSumdiff) ||
503 504
             (content_state == kHighSadLowSumdiff) ||
             (content_state == kLowVarHighSumdiff))
505 506 507
      return (5 * threshold_base) >> 2;
  } else if (speed == 7) {
    if ((content_state == kLowSadLowSumdiff) ||
508 509
        (content_state == kHighSadLowSumdiff) ||
        (content_state == kLowVarHighSumdiff)) {
510 511 512 513 514 515
      return (5 * threshold_base) >> 2;
    }
  }
  return threshold_base;
}

Marco's avatar
Marco committed
516 517 518 519
// Set the variance split thresholds for following the block sizes:
// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
// currently only used on key frame.
520 521
static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
                               int content_state) {
Marco's avatar
Marco committed
522 523 524
  VP9_COMMON *const cm = &cpi->common;
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
  const int threshold_multiplier = is_key_frame ? 20 : 1;
clang-format's avatar
clang-format committed
525 526
  int64_t threshold_base =
      (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
527

Marco's avatar
Marco committed
528 529 530 531 532 533
  if (is_key_frame) {
    thresholds[0] = threshold_base;
    thresholds[1] = threshold_base >> 2;
    thresholds[2] = threshold_base >> 2;
    thresholds[3] = threshold_base << 2;
  } else {
534 535
    // Increase base variance threshold based on estimated noise level.
    if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
clang-format's avatar
clang-format committed
536 537
      NOISE_LEVEL noise_level =
          vp9_noise_estimate_extract_level(&cpi->noise_estimate);
538
      if (noise_level == kHigh)
539
        threshold_base = 3 * threshold_base;
540 541
      else if (noise_level == kMedium)
        threshold_base = threshold_base << 1;
542
      else if (noise_level < kLow)
543
        threshold_base = (7 * threshold_base) >> 3;
544
    }
545
#if CONFIG_VP9_TEMPORAL_DENOISING
546 547
    if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
        cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
548 549 550
      threshold_base =
          vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
                                content_state, cpi->svc.temporal_layer_id);
551
    else
552
      threshold_base =
553 554
          scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
                                    cm->height, content_state);
555
#else
556 557 558
    // Increase base variance threshold based on content_state/sum_diff level.
    threshold_base = scale_part_thresh_sumdiff(
        threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
559
#endif
560 561
    thresholds[0] = threshold_base;
    thresholds[2] = threshold_base << cpi->oxcf.speed;
562 563
    if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
      thresholds[2] = thresholds[2] << 1;
Marco's avatar
Marco committed
564
    if (cm->width <= 352 && cm->height <= 288) {
565 566
      thresholds[0] = threshold_base >> 3;
      thresholds[1] = threshold_base >> 1;
567
      thresholds[2] = threshold_base << 3;
568
    } else if (cm->width < 1280 && cm->height < 720) {
Marco's avatar
Marco committed
569
      thresholds[1] = (5 * threshold_base) >> 2;
570 571 572 573
    } else if (cm->width < 1920 && cm->height < 1080) {
      thresholds[1] = threshold_base << 1;
    } else {
      thresholds[1] = (5 * threshold_base) >> 1;
Marco's avatar
Marco committed
574 575 576 577
    }
  }
}

578 579
void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
                                           int content_state) {
Marco's avatar
Marco committed
580
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
581
  SPEED_FEATURES *const sf = &cpi->sf;
Marco's avatar
Marco committed
582
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
583 584
  if (sf->partition_search_type != VAR_BASED_PARTITION &&
      sf->partition_search_type != REFERENCE_PARTITION) {
Yaowu Xu's avatar
Yaowu Xu committed
585 586
    return;
  } else {
587
    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
Marco's avatar
Marco committed
588
    // The thresholds below are not changed locally.
589
    if (is_key_frame) {
590
      cpi->vbp_threshold_sad = 0;
591
      cpi->vbp_threshold_copy = 0;
592 593
      cpi->vbp_bsize_min = BLOCK_8X8;
    } else {
Marco's avatar
Marco committed
594
      if (cm->width <= 352 && cm->height <= 288)
595
        cpi->vbp_threshold_sad = 10;
Marco's avatar
Marco committed
596
      else
clang-format's avatar
clang-format committed
597 598 599
        cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
                                     ? (cpi->y_dequant[q][1] << 1)
                                     : 1000;
600
      cpi->vbp_bsize_min = BLOCK_16X16;
601
      if (cm->width <= 352 && cm->height <= 288)
602 603
        cpi->vbp_threshold_copy = 4000;
      else if (cm->width <= 640 && cm->height <= 360)
604
        cpi->vbp_threshold_copy = 8000;
605
      else
606 607 608
        cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
                                      ? (cpi->y_dequant[q][1] << 3)
                                      : 8000;
609
    }
610
    cpi->vbp_threshold_minmax = 15 + (q >> 3);
Yaowu Xu's avatar
Yaowu Xu committed
611 612 613
  }
}

614 615 616 617 618 619
// Compute the minmax over the 8x8 subblocks.
static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
                              int dp, int x16_idx, int y16_idx,
#if CONFIG_VP9_HIGHBITDEPTH
                              int highbd_flag,
#endif
clang-format's avatar
clang-format committed
620
                              int pixels_wide, int pixels_high) {
621 622 623 624 625 626 627 628 629 630 631 632
  int k;
  int minmax_max = 0;
  int minmax_min = 255;
  // Loop over the 4 8x8 subblocks.
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    int min = 0;
    int max = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
633
        vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
clang-format's avatar
clang-format committed
634
                              d + y8_idx * dp + x8_idx, dp, &min, &max);
635
      } else {
clang-format's avatar
clang-format committed
636 637
        vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
                       dp, &min, &max);
638 639
      }
#else
clang-format's avatar
clang-format committed
640
      vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
641 642
                     &min, &max);
#endif
clang-format's avatar
clang-format committed
643 644
      if ((max - min) > minmax_max) minmax_max = (max - min);
      if ((max - min) < minmax_min) minmax_min = (max - min);
645 646 647 648 649
    }
  }
  return (minmax_max - minmax_min);
}

650 651 652 653 654
static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x8_idx, int y8_idx, v8x8 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
655
                                 int pixels_wide, int pixels_high,
656 657 658 659 660 661 662 663 664 665 666 667
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x4_idx = x8_idx + ((k & 1) << 2);
    int y4_idx = y8_idx + ((k >> 1) << 2);
    unsigned int sse = 0;
    int sum = 0;
    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
668
        s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
669
        if (!is_key_frame)
James Zern's avatar
James Zern committed
670
          d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
671
      } else {
James Zern's avatar
James Zern committed
672
        s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
673
        if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
674 675
      }
#else
James Zern's avatar
James Zern committed
676
      s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
677
      if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
678 679 680 681 682 683 684 685 686 687 688 689 690
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x16_idx, int y16_idx, v16x16 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
691
                                 int pixels_wide, int pixels_high,
692 693 694 695 696 697 698 699 700 701 702 703
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    unsigned int sse = 0;
    int sum = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
704
        s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
705
        if (!is_key_frame)
James Zern's avatar
James Zern committed
706
          d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
707
      } else {
James Zern's avatar
James Zern committed
708
        s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
709
        if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
710 711
      }
#else
James Zern's avatar
James Zern committed
712
      s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
713
      if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
714 715 716 717 718 719 720 721
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

JackyChen's avatar
JackyChen committed
722 723 724 725
// Check if most of the superblock is skin content, and if so, force split to
// 32x32, and set x->sb_is_skin for use in mode selection.
static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
                         int mi_row, int mi_col, int *force_split) {
clang-format's avatar
clang-format committed
726
  VP9_COMMON *const cm = &cpi->common;
727 728 729
#if CONFIG_VP9_HIGHBITDEPTH
  if (cm->use_highbitdepth) return 0;
#endif
JackyChen's avatar
JackyChen committed
730 731 732 733
  // Avoid checking superblocks on/near boundary and avoid low resolutions.
  // Note superblock may still pick 64X64 if y_sad is very small
  // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
  if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
clang-format's avatar
clang-format committed
734
                   mi_row + 8 < cm->mi_rows)) {
JackyChen's avatar
JackyChen committed
735 736 737 738 739 740 741 742 743 744 745 746 747 748
    int num_16x16_skin = 0;
    int num_16x16_nonskin = 0;
    uint8_t *ysignal = x->plane[0].src.buf;
    uint8_t *usignal = x->plane[1].src.buf;
    uint8_t *vsignal = x->plane[2].src.buf;
    int sp = x->plane[0].src.stride;
    int spuv = x->plane[1].src.stride;
    const int block_index = mi_row * cm->mi_cols + mi_col;
    const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
    const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
    const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
    const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
    // Loop through the 16x16 sub-blocks.
    int i, j;
clang-format's avatar
clang-format committed
749 750
    for (i = 0; i < ymis; i += 2) {
      for (j = 0; j < xmis; j += 2) {
JackyChen's avatar
JackyChen committed
751
        int bl_index = block_index + i * cm->mi_cols + j;
752
        int is_skin = cpi->skin_map[bl_index];
JackyChen's avatar
JackyChen committed
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
        num_16x16_skin += is_skin;
        num_16x16_nonskin += (1 - is_skin);
        if (num_16x16_nonskin > 3) {
          // Exit loop if at least 4 of the 16x16 blocks are not skin.
          i = ymis;
          break;
        }
        ysignal += 16;
        usignal += 8;
        vsignal += 8;
      }
      ysignal += (sp << 4) - 64;
      usignal += (spuv << 3) - 32;
      vsignal += (spuv << 3) - 32;
    }
    if (num_16x16_skin > 12) {
      *force_split = 1;
      return 1;
    }
  }
  return 0;
}

clang-format's avatar
clang-format committed
776
static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
777
                                  v64x64 *vt, int64_t thresholds[],
778 779 780
                                  MV_REFERENCE_FRAME ref_frame_partition,
                                  int mi_col, int mi_row) {
  int i, j;
clang-format's avatar
clang-format committed
781
  VP9_COMMON *const cm = &cpi->common;
782 783 784 785 786 787 788 789 790 791 792
  const int mv_thr = cm->width > 640 ? 8 : 4;
  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
  // int_pro mv is small. If the temporal variance is small set the flag
  // variance_low for the block. The variance threshold can be adjusted, the
  // higher the more aggressive.
  if (ref_frame_partition == LAST_FRAME &&
      (cpi->sf.short_circuit_low_temp_var == 1 ||
       (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
        xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
        xd->mi[0]->mv[0].as_mv.row < mv_thr &&
        xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
793 794 795
    if (xd->mi[0]->sb_type == BLOCK_64X64) {
      if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
        x->variance_low[0] = 1;
796 797 798 799 800 801 802 803 804 805 806 807
    } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 1] = 1;
      }
    } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 3] = 1;
      }
    } else {
      for (i = 0; i < 4; i++) {
808 809 810 811 812 813 814 815 816 817
        const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
        const int idx_str =
            cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
        MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;

        if (cm->mi_cols <= mi_col + idx[i][1] ||
            cm->mi_rows <= mi_row + idx[i][0])
          continue;

        if ((*this_mi)->sb_type == BLOCK_32X32) {
818 819
          int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
                                     cpi->sf.short_circuit_low_temp_var == 3)
820
                                        ? ((5 * thresholds[1]) >> 3)
821 822
                                        : (thresholds[1] >> 1);
          if (vt->split[i].part_variances.none.variance < threshold_32x32)
823
            x->variance_low[i + 5] = 1;
824
        } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
          // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
          // inside.
          if ((*this_mi)->sb_type == BLOCK_16X16 ||
              (*this_mi)->sb_type == BLOCK_32X16 ||
              (*this_mi)->sb_type == BLOCK_16X32) {
            for (j = 0; j < 4; j++) {
              if (vt->split[i].split[j].part_variances.none.variance <
                  (thresholds[2] >> 8))
                x->variance_low[(i << 2) + j + 9] = 1;
            }
          }
        }
      }
    }
  }
}

842