vp9_encodeframe.c 187 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11 12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>

Jim Bankoski's avatar
Jim Bankoski committed
15
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
16
#include "./vpx_dsp_rtcd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
17 18
#include "./vpx_config.h"

19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_ports/mem.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
21
#include "vpx_ports/vpx_timer.h"
22
#include "vpx_ports/system_state.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
23

24
#include "vp9/common/vp9_common.h"
Yaowu Xu's avatar
Yaowu Xu committed
25
#include "vp9/common/vp9_entropy.h"
26
#include "vp9/common/vp9_entropymode.h"
27
#include "vp9/common/vp9_idct.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
28 29 30
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
31
#include "vp9/common/vp9_reconintra.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
32
#include "vp9/common/vp9_reconinter.h"
33
#include "vp9/common/vp9_seg_common.h"
34
#include "vp9/common/vp9_tile_common.h"
35

36
#include "vp9/encoder/vp9_aq_360.h"
37
#include "vp9/encoder/vp9_aq_complexity.h"
Marco Paniconi's avatar
Marco Paniconi committed
38 39
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
40 41 42
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
43
#include "vp9/encoder/vp9_ethread.h"
44
#include "vp9/encoder/vp9_extend.h"
45
#include "vp9/encoder/vp9_pickmode.h"
46
#include "vp9/encoder/vp9_rd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
47 48
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
49
#include "vp9/encoder/vp9_tokenize.h"
50

clang-format's avatar
clang-format committed
51 52 53
static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                              int output_enabled, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
// Machine learning-based early termination parameters.
static const double train_mean[24] = {
  303501.697372, 3042630.372158, 24.694696, 1.392182,
  689.413511,    162.027012,     1.478213,  0.0,
  135382.260230, 912738.513263,  28.845217, 1.515230,
  544.158492,    131.807995,     1.436863,  0.0,
  43682.377587,  208131.711766,  28.084737, 1.356677,
  138.254122,    119.522553,     1.252322,  0.0
};

static const double train_stdm[24] = {
  673689.212982, 5996652.516628, 0.024449, 1.989792,
  985.880847,    0.014638,       2.001898, 0.0,
  208798.775332, 1812548.443284, 0.018693, 1.838009,
  396.986910,    0.015657,       1.332541, 0.0,
  55888.847031,  448587.962714,  0.017900, 1.904776,
  98.652832,     0.016598,       1.320992, 0.0
};

// Error tolerance: 0.01%-0.0.05%-0.1%
static const double classifiers[24] = {
  0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
  0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
  0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
  0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
};

82
// This is used as a reference when computing the source variance for the
83
//  purpose of activity masking.
84 85
// Eventually this should be replaced by custom no-reference routines,
//  which will be faster.
86
static const uint8_t VP9_VAR_OFFS[64] = {
clang-format's avatar
clang-format committed
87 88 89 90 91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
92 93
};

94 95
#if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
clang-format's avatar
clang-format committed
96 97 98 99 100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
101 102 103
};

static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
clang-format's avatar
clang-format committed
104 105 106 107 108 109 110 111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
112 113 114
};

static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
clang-format's avatar
clang-format committed
115 116 117 118 119 120 121 122 123 124
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16
125 126 127
};
#endif  // CONFIG_VP9_HIGHBITDEPTH

128 129 130
unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
                                           const struct buf_2d *ref,
                                           BLOCK_SIZE bs) {
131
  unsigned int sse;
clang-format's avatar
clang-format committed
132 133
  const unsigned int var =
      cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
134
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
135 136
}

137
#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
138 139 140
unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
                                                const struct buf_2d *ref,
                                                BLOCK_SIZE bs, int bd) {
141 142 143
  unsigned int var, sse;
  switch (bd) {
    case 10:
clang-format's avatar
clang-format committed
144 145 146
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
147 148
      break;
    case 12:
clang-format's avatar
clang-format committed
149 150 151
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
152 153 154
      break;
    case 8:
    default:
clang-format's avatar
clang-format committed
155 156 157
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
158 159
      break;
  }
160 161
  return (unsigned int)ROUND64_POWER_OF_TWO((int64_t)var,
                                            num_pels_log2_lookup[bs]);
162 163 164
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

165
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
166 167
                                                   const struct buf_2d *ref,
                                                   int mi_row, int mi_col,
168
                                                   BLOCK_SIZE bs) {
169 170
  unsigned int sse, var;
  uint8_t *last_y;
171
  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
172 173 174 175 176

  assert(last != NULL);
  last_y =
      &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
  var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
177 178 179
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}

180
static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
clang-format's avatar
clang-format committed
181 182 183
                                                   int mi_row, int mi_col) {
  unsigned int var = get_sby_perpixel_diff_variance(
      cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
184
  if (var < 8)
185
    return BLOCK_64X64;
186
  else if (var < 128)
187
    return BLOCK_32X32;
188 189 190 191
  else if (var < 2048)
    return BLOCK_16X16;
  else
    return BLOCK_8X8;
192 193
}

194 195
// Lighter version of set_offsets that only sets the mode info
// pointers.
Jingning Han's avatar
Jingning Han committed
196
static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
197
                                         MACROBLOCK *const x,
clang-format's avatar
clang-format committed
198
                                         MACROBLOCKD *const xd, int mi_row,
Jingning Han's avatar
Jingning Han committed
199
                                         int mi_col) {
200
  const int idx_str = xd->mi_stride * mi_row + mi_col;
201 202
  xd->mi = cm->mi_grid_visible + idx_str;
  xd->mi[0] = cm->mi + idx_str;
203
  x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
204 205 206
}

static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
207 208
                        MACROBLOCK *const x, int mi_row, int mi_col,
                        BLOCK_SIZE bsize) {
209 210
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
211
  MODE_INFO *mi;
212 213 214
  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  const int mi_height = num_8x8_blocks_high_lookup[bsize];
  const struct segmentation *const seg = &cm->seg;
Alex Converse's avatar
Alex Converse committed
215
  MvLimits *const mv_limits = &x->mv_limits;
216

217
  set_skip_context(xd, mi_row, mi_col);
218

219 220
  set_mode_info_offsets(cm, x, xd, mi_row, mi_col);

Scott LaVarnway's avatar
Scott LaVarnway committed
221
  mi = xd->mi[0];
222 223

  // Set up destination pointers.
224
  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
225 226 227

  // Set up limit values for MV components.
  // Mv beyond the range do not produce new/different prediction block.
Alex Converse's avatar
Alex Converse committed
228 229 230 231
  mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
  mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
232 233 234

  // Set up distance of MB to edge of frame in 1/8th pel units.
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
clang-format's avatar
clang-format committed
235 236
  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
                 cm->mi_cols);
237 238 239 240 241

  // Set up source buffers.
  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);

  // R/D setup.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
242 243
  x->rddiv = cpi->rd.RDDIV;
  x->rdmult = cpi->rd.RDMULT;
244 245 246

  // Setup segment ID.
  if (seg->enabled) {
247
    if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
248
        cpi->oxcf.aq_mode != EQUATOR360_AQ) {
clang-format's avatar
clang-format committed
249 250
      const uint8_t *const map =
          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
Scott LaVarnway's avatar
Scott LaVarnway committed
251
      mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
252 253 254
    }
    vp9_init_plane_quantizers(cpi, x);

Scott LaVarnway's avatar
Scott LaVarnway committed
255
    x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
256
  } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
257
    mi->segment_id = 0;
258 259
    x->encode_breakout = cpi->encode_breakout;
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
260 261 262

  // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
  xd->tile = *tile;
263 264
}

265 266 267
static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      int mi_row, int mi_col,
                                      BLOCK_SIZE bsize) {
clang-format's avatar
clang-format committed
268 269 270 271
  const int block_width =
      VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
  const int block_height =
      VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
272 273
  const int mi_stride = xd->mi_stride;
  MODE_INFO *const src_mi = xd->mi[0];
274
  int i, j;
275

276
  for (j = 0; j < block_height; ++j)
clang-format's avatar
clang-format committed
277
    for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
278 279
}

clang-format's avatar
clang-format committed
280 281
static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
                           MACROBLOCKD *const xd, int mi_row, int mi_col,
282 283
                           BLOCK_SIZE bsize) {
  if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
284
    set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
Scott LaVarnway's avatar
Scott LaVarnway committed
285
    xd->mi[0]->sb_type = bsize;
286 287 288 289 290 291
  }
}

typedef struct {
  int64_t sum_square_error;
  int64_t sum_error;
292
  int log2_count;
293 294 295 296 297 298 299 300 301 302 303 304
  int variance;
} var;

typedef struct {
  var none;
  var horz[2];
  var vert[2];
} partition_variance;

typedef struct {
  partition_variance part_variances;
  var split[4];
305 306 307 308 309
} v4x4;

typedef struct {
  partition_variance part_variances;
  v4x4 split[4];
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
} v8x8;

typedef struct {
  partition_variance part_variances;
  v8x8 split[4];
} v16x16;

typedef struct {
  partition_variance part_variances;
  v16x16 split[4];
} v32x32;

typedef struct {
  partition_variance part_variances;
  v32x32 split[4];
} v64x64;

typedef struct {
  partition_variance *part_variances;
  var *split[4];
} variance_node;

typedef enum {
  V16X16,
  V32X32,
  V64X64,
} TREE_LEVEL;

static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
  int i;
340
  node->part_variances = NULL;
341 342
  switch (bsize) {
    case BLOCK_64X64: {
clang-format's avatar
clang-format committed
343
      v64x64 *vt = (v64x64 *)data;
344 345 346 347 348 349
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_32X32: {
clang-format's avatar
clang-format committed
350
      v32x32 *vt = (v32x32 *)data;
351 352 353 354 355 356
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_16X16: {
clang-format's avatar
clang-format committed
357
      v16x16 *vt = (v16x16 *)data;
358 359 360 361 362 363
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_8X8: {
clang-format's avatar
clang-format committed
364
      v8x8 *vt = (v8x8 *)data;
365
      node->part_variances = &vt->part_variances;
366 367 368 369 370
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_4X4: {
clang-format's avatar
clang-format committed
371
      v4x4 *vt = (v4x4 *)data;
372
      node->part_variances = &vt->part_variances;
clang-format's avatar
clang-format committed
373
      for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
374 375 376 377
      break;
    }
    default: {
      assert(0);
378
      break;
379 380 381 382 383 384 385 386
    }
  }
}

// Set variance values given sum square error, sum error, count.
static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
  v->sum_square_error = s2;
  v->sum_error = s;
387
  v->log2_count = c;
388 389 390
}

static void get_variance(var *v) {
clang-format's avatar
clang-format committed
391 392 393 394
  v->variance =
      (int)(256 * (v->sum_square_error -
                   ((v->sum_error * v->sum_error) >> v->log2_count)) >>
            v->log2_count);
395 396
}

397
static void sum_2_variances(const var *a, const var *b, var *r) {
398
  assert(a->log2_count == b->log2_count);
399
  fill_variance(a->sum_square_error + b->sum_square_error,
400
                a->sum_error + b->sum_error, a->log2_count + 1, r);
401 402 403 404
}

static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
  variance_node node;
405
  memset(&node, 0, sizeof(node));
406 407 408 409 410 411 412 413 414
  tree_to_node(data, bsize, &node);
  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
                  &node.part_variances->none);
}

clang-format's avatar
clang-format committed
415 416 417 418
static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
                               MACROBLOCKD *const xd, void *data,
                               BLOCK_SIZE bsize, int mi_row, int mi_col,
                               int64_t threshold, BLOCK_SIZE bsize_min,
Marco's avatar
Marco committed
419
                               int force_split) {
clang-format's avatar
clang-format committed
420
  VP9_COMMON *const cm = &cpi->common;
421 422 423
  variance_node vt;
  const int block_width = num_8x8_blocks_wide_lookup[bsize];
  const int block_height = num_8x8_blocks_high_lookup[bsize];
424

425 426 427
  assert(block_height == block_width);
  tree_to_node(data, bsize, &vt);

clang-format's avatar
clang-format committed
428
  if (force_split == 1) return 0;
429

430
  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
431 432
  // variance is below threshold, otherwise split will be selected.
  // No check for vert/horiz split as too few samples for variance.
433
  if (bsize == bsize_min) {
434
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
435
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
436 437
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
438
        vt.part_variances->none.variance < threshold) {
439
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
440 441 442
      return 1;
    }
    return 0;
443
  } else if (bsize > bsize_min) {
444
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
445
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
446
    // For key frame: take split for bsize above 32X32 or very high variance.
447 448
    if (cm->frame_type == KEY_FRAME &&
        (bsize > BLOCK_32X32 ||
clang-format's avatar
clang-format committed
449
         vt.part_variances->none.variance > (threshold << 4))) {
450 451 452 453 454
      return 0;
    }
    // If variance is low, take the bsize (no split).
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
455
        vt.part_variances->none.variance < threshold) {
456
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
457 458
      return 1;
    }
459

460
    // Check vertical split.
461
    if (mi_row + block_height / 2 < cm->mi_rows) {
Yaowu Xu's avatar
Yaowu Xu committed
462
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
463 464
      get_variance(&vt.part_variances->vert[0]);
      get_variance(&vt.part_variances->vert[1]);
465
      if (vt.part_variances->vert[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
466 467
          vt.part_variances->vert[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
468 469
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
470 471
        return 1;
      }
472
    }
473
    // Check horizontal split.
474
    if (mi_col + block_width / 2 < cm->mi_cols) {
Yaowu Xu's avatar
Yaowu Xu committed
475
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
476 477
      get_variance(&vt.part_variances->horz[0]);
      get_variance(&vt.part_variances->horz[1]);
478
      if (vt.part_variances->horz[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
479 480
          vt.part_variances->horz[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
481 482
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
483 484
        return 1;
      }
485
    }
486

487
    return 0;
488 489 490 491
  }
  return 0;
}

492 493 494
static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
                                         int width, int height,
                                         int content_state) {
495 496 497 498
  if (speed >= 8) {
    if (width <= 640 && height <= 480)
      return (5 * threshold_base) >> 2;
    else if ((content_state == kLowSadLowSumdiff) ||
499 500
             (content_state == kHighSadLowSumdiff) ||
             (content_state == kLowVarHighSumdiff))
501 502 503
      return (5 * threshold_base) >> 2;
  } else if (speed == 7) {
    if ((content_state == kLowSadLowSumdiff) ||
504 505
        (content_state == kHighSadLowSumdiff) ||
        (content_state == kLowVarHighSumdiff)) {
506 507 508 509 510 511
      return (5 * threshold_base) >> 2;
    }
  }
  return threshold_base;
}

Marco's avatar
Marco committed
512 513 514 515
// Set the variance split thresholds for following the block sizes:
// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
// currently only used on key frame.
516 517
static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
                               int content_state) {
Marco's avatar
Marco committed
518 519 520
  VP9_COMMON *const cm = &cpi->common;
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
  const int threshold_multiplier = is_key_frame ? 20 : 1;
clang-format's avatar
clang-format committed
521 522
  int64_t threshold_base =
      (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
523

Marco's avatar
Marco committed
524 525 526 527 528 529
  if (is_key_frame) {
    thresholds[0] = threshold_base;
    thresholds[1] = threshold_base >> 2;
    thresholds[2] = threshold_base >> 2;
    thresholds[3] = threshold_base << 2;
  } else {
530 531
    // Increase base variance threshold based on estimated noise level.
    if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
clang-format's avatar
clang-format committed
532 533
      NOISE_LEVEL noise_level =
          vp9_noise_estimate_extract_level(&cpi->noise_estimate);
534
      if (noise_level == kHigh)
535
        threshold_base = 3 * threshold_base;
536 537
      else if (noise_level == kMedium)
        threshold_base = threshold_base << 1;
538
      else if (noise_level < kLow)
539
        threshold_base = (7 * threshold_base) >> 3;
540
    }
541
#if CONFIG_VP9_TEMPORAL_DENOISING
542 543
    if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
        cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
544 545 546
      threshold_base =
          vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
                                content_state, cpi->svc.temporal_layer_id);
547
    else
548
      threshold_base =
549 550
          scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
                                    cm->height, content_state);
551
#else
552 553 554
    // Increase base variance threshold based on content_state/sum_diff level.
    threshold_base = scale_part_thresh_sumdiff(
        threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
555
#endif
556 557
    thresholds[0] = threshold_base;
    thresholds[2] = threshold_base << cpi->oxcf.speed;
558 559
    if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
      thresholds[2] = thresholds[2] << 1;
Marco's avatar
Marco committed
560
    if (cm->width <= 352 && cm->height <= 288) {
561 562
      thresholds[0] = threshold_base >> 3;
      thresholds[1] = threshold_base >> 1;
563
      thresholds[2] = threshold_base << 3;
564
    } else if (cm->width < 1280 && cm->height < 720) {
Marco's avatar
Marco committed
565
      thresholds[1] = (5 * threshold_base) >> 2;
566 567 568 569
    } else if (cm->width < 1920 && cm->height < 1080) {
      thresholds[1] = threshold_base << 1;
    } else {
      thresholds[1] = (5 * threshold_base) >> 1;
Marco's avatar
Marco committed
570 571 572 573
    }
  }
}

574 575
void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
                                           int content_state) {
Marco's avatar
Marco committed
576
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
577
  SPEED_FEATURES *const sf = &cpi->sf;
Marco's avatar
Marco committed
578
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
579 580
  if (sf->partition_search_type != VAR_BASED_PARTITION &&
      sf->partition_search_type != REFERENCE_PARTITION) {
Yaowu Xu's avatar
Yaowu Xu committed
581 582
    return;
  } else {
583
    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
Marco's avatar
Marco committed
584
    // The thresholds below are not changed locally.
585
    if (is_key_frame) {
586
      cpi->vbp_threshold_sad = 0;
587
      cpi->vbp_threshold_copy = 0;
588 589
      cpi->vbp_bsize_min = BLOCK_8X8;
    } else {
Marco's avatar
Marco committed
590
      if (cm->width <= 352 && cm->height <= 288)
591
        cpi->vbp_threshold_sad = 10;
Marco's avatar
Marco committed
592
      else
clang-format's avatar
clang-format committed
593 594 595
        cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
                                     ? (cpi->y_dequant[q][1] << 1)
                                     : 1000;
596
      cpi->vbp_bsize_min = BLOCK_16X16;
597
      if (cm->width <= 352 && cm->height <= 288)
598 599
        cpi->vbp_threshold_copy = 4000;
      else if (cm->width <= 640 && cm->height <= 360)
600
        cpi->vbp_threshold_copy = 8000;
601
      else
602 603 604
        cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
                                      ? (cpi->y_dequant[q][1] << 3)
                                      : 8000;
605
    }
606
    cpi->vbp_threshold_minmax = 15 + (q >> 3);
Yaowu Xu's avatar
Yaowu Xu committed
607 608 609
  }
}

610 611 612 613 614 615
// Compute the minmax over the 8x8 subblocks.
static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
                              int dp, int x16_idx, int y16_idx,
#if CONFIG_VP9_HIGHBITDEPTH
                              int highbd_flag,
#endif
clang-format's avatar
clang-format committed
616
                              int pixels_wide, int pixels_high) {
617 618 619 620 621 622 623 624 625 626 627 628
  int k;
  int minmax_max = 0;
  int minmax_min = 255;
  // Loop over the 4 8x8 subblocks.
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    int min = 0;
    int max = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
629
        vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
clang-format's avatar
clang-format committed
630
                              d + y8_idx * dp + x8_idx, dp, &min, &max);
631
      } else {
clang-format's avatar
clang-format committed
632 633
        vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
                       dp, &min, &max);
634 635
      }
#else
clang-format's avatar
clang-format committed
636
      vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
637 638
                     &min, &max);
#endif
clang-format's avatar
clang-format committed
639 640
      if ((max - min) > minmax_max) minmax_max = (max - min);
      if ((max - min) < minmax_min) minmax_min = (max - min);
641 642 643 644 645
    }
  }
  return (minmax_max - minmax_min);
}

646 647 648 649 650
static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x8_idx, int y8_idx, v8x8 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
651
                                 int pixels_wide, int pixels_high,
652 653 654 655 656 657 658 659 660 661 662 663
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x4_idx = x8_idx + ((k & 1) << 2);
    int y4_idx = y8_idx + ((k >> 1) << 2);
    unsigned int sse = 0;
    int sum = 0;
    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
664
        s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
665
        if (!is_key_frame)
James Zern's avatar
James Zern committed
666
          d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
667
      } else {
James Zern's avatar
James Zern committed
668
        s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
669
        if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
670 671
      }
#else
James Zern's avatar
James Zern committed
672
      s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
673
      if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
674 675 676 677 678 679 680 681 682 683 684 685 686
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x16_idx, int y16_idx, v16x16 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
687
                                 int pixels_wide, int pixels_high,
688 689 690 691 692 693 694 695 696 697 698 699
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    unsigned int sse = 0;
    int sum = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
700
        s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
701
        if (!is_key_frame)
James Zern's avatar
James Zern committed
702
          d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
703
      } else {
James Zern's avatar
James Zern committed
704
        s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
705
        if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
706 707
      }
#else
James Zern's avatar
James Zern committed
708
      s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
709
      if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
710 711 712 713 714 715 716 717
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

JackyChen's avatar
JackyChen committed
718 719 720 721
// Check if most of the superblock is skin content, and if so, force split to
// 32x32, and set x->sb_is_skin for use in mode selection.
static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
                         int mi_row, int mi_col, int *force_split) {
clang-format's avatar
clang-format committed
722
  VP9_COMMON *const cm = &cpi->common;
723 724 725
#if CONFIG_VP9_HIGHBITDEPTH
  if (cm->use_highbitdepth) return 0;
#endif
JackyChen's avatar
JackyChen committed
726 727 728 729
  // Avoid checking superblocks on/near boundary and avoid low resolutions.
  // Note superblock may still pick 64X64 if y_sad is very small
  // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
  if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
clang-format's avatar
clang-format committed
730
                   mi_row + 8 < cm->mi_rows)) {
JackyChen's avatar
JackyChen committed
731 732 733 734 735 736 737 738 739 740 741 742 743 744
    int num_16x16_skin = 0;
    int num_16x16_nonskin = 0;
    uint8_t *ysignal = x->plane[0].src.buf;
    uint8_t *usignal = x->plane[1].src.buf;
    uint8_t *vsignal = x->plane[2].src.buf;
    int sp = x->plane[0].src.stride;
    int spuv = x->plane[1].src.stride;
    const int block_index = mi_row * cm->mi_cols + mi_col;
    const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
    const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
    const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
    const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
    // Loop through the 16x16 sub-blocks.
    int i, j;
clang-format's avatar
clang-format committed
745 746
    for (i = 0; i < ymis; i += 2) {
      for (j = 0; j < xmis; j += 2) {
JackyChen's avatar
JackyChen committed
747
        int bl_index = block_index + i * cm->mi_cols + j;
748
        int is_skin = cpi->skin_map[bl_index];
JackyChen's avatar
JackyChen committed
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
        num_16x16_skin += is_skin;
        num_16x16_nonskin += (1 - is_skin);
        if (num_16x16_nonskin > 3) {
          // Exit loop if at least 4 of the 16x16 blocks are not skin.
          i = ymis;
          break;
        }
        ysignal += 16;
        usignal += 8;
        vsignal += 8;
      }
      ysignal += (sp << 4) - 64;
      usignal += (spuv << 3) - 32;
      vsignal += (spuv << 3) - 32;
    }
    if (num_16x16_skin > 12) {
      *force_split = 1;
      return 1;
    }
  }
  return 0;
}

clang-format's avatar
clang-format committed
772
static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
773
                                  v64x64 *vt, int64_t thresholds[],
774 775 776
                                  MV_REFERENCE_FRAME ref_frame_partition,
                                  int mi_col, int mi_row) {
  int i, j;
clang-format's avatar
clang-format committed
777
  VP9_COMMON *const cm = &cpi->common;
778 779 780 781 782 783 784 785 786 787 788
  const int mv_thr = cm->width > 640 ? 8 : 4;
  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
  // int_pro mv is small. If the temporal variance is small set the flag
  // variance_low for the block. The variance threshold can be adjusted, the
  // higher the more aggressive.
  if (ref_frame_partition == LAST_FRAME &&
      (cpi->sf.short_circuit_low_temp_var == 1 ||
       (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
        xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
        xd->mi[0]->mv[0].as_mv.row < mv_thr &&
        xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
789 790 791
    if (xd->mi[0]->sb_type == BLOCK_64X64) {
      if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
        x->variance_low[0] = 1;
792 793 794 795 796 797 798 799 800 801 802 803
    } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 1] = 1;
      }
    } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 3] = 1;
      }
    } else {
      for (i = 0; i < 4; i++) {
804 805 806 807 808 809 810 811 812 813
        const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
        const int idx_str =
            cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
        MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;

        if (cm->mi_cols <= mi_col + idx[i][1] ||
            cm->mi_rows <= mi_row + idx[i][0])
          continue;

        if ((*this_mi)->sb_type == BLOCK_32X32) {
814 815
          int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
                                     cpi->sf.short_circuit_low_temp_var == 3)
816
                                        ? ((5 * thresholds[1]) >> 3)
817 818
                                        : (thresholds[1] >> 1);
          if (vt->split[i].part_variances.none.variance < threshold_32x32)
819
            x->variance_low[i + 5] = 1;
820
        } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
          // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
          // inside.
          if ((*this_mi)->sb_type == BLOCK_16X16 ||
              (*this_mi)->sb_type == BLOCK_32X16 ||
              (*this_mi)->sb_type == BLOCK_16X32) {
            for (j = 0; j < 4; j++) {
              if (vt->split[i].split[j].part_variances.none.variance <
                  (thresholds[2] >> 8))
                x->variance_low[(i << 2) + j + 9] = 1;
            }
          }
        }
      }
    }
  }
}

838 839
static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
                                     MACROBLOCKD *xd, BLOCK_SIZE bsize,
840
                                     int mi_row, int mi_col) {
841 842 843 844 845