vp9_encodeframe.c 195 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11 12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>

Jim Bankoski's avatar
Jim Bankoski committed
15
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
16
#include "./vpx_dsp_rtcd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
17 18
#include "./vpx_config.h"

19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_ports/mem.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
21
#include "vpx_ports/vpx_timer.h"
22
#include "vpx_ports/system_state.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
23

24
#include "vp9/common/vp9_common.h"
Yaowu Xu's avatar
Yaowu Xu committed
25
#include "vp9/common/vp9_entropy.h"
26
#include "vp9/common/vp9_entropymode.h"
27
#include "vp9/common/vp9_idct.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
28 29 30
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
31
#include "vp9/common/vp9_reconintra.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
32
#include "vp9/common/vp9_reconinter.h"
33
#include "vp9/common/vp9_seg_common.h"
34
#include "vp9/common/vp9_tile_common.h"
35

36
#include "vp9/encoder/vp9_aq_360.h"
37
#include "vp9/encoder/vp9_aq_complexity.h"
Marco Paniconi's avatar
Marco Paniconi committed
38 39
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
40 41 42
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
43
#include "vp9/encoder/vp9_ethread.h"
44
#include "vp9/encoder/vp9_extend.h"
45
#include "vp9/encoder/vp9_pickmode.h"
46
#include "vp9/encoder/vp9_rd.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
47 48
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
49
#include "vp9/encoder/vp9_tokenize.h"
50

clang-format's avatar
clang-format committed
51 52 53
static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                              int output_enabled, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
// Machine learning-based early termination parameters.
static const double train_mean[24] = {
  303501.697372, 3042630.372158, 24.694696, 1.392182,
  689.413511,    162.027012,     1.478213,  0.0,
  135382.260230, 912738.513263,  28.845217, 1.515230,
  544.158492,    131.807995,     1.436863,  0.0,
  43682.377587,  208131.711766,  28.084737, 1.356677,
  138.254122,    119.522553,     1.252322,  0.0
};

static const double train_stdm[24] = {
  673689.212982, 5996652.516628, 0.024449, 1.989792,
  985.880847,    0.014638,       2.001898, 0.0,
  208798.775332, 1812548.443284, 0.018693, 1.838009,
  396.986910,    0.015657,       1.332541, 0.0,
  55888.847031,  448587.962714,  0.017900, 1.904776,
  98.652832,     0.016598,       1.320992, 0.0
};

// Error tolerance: 0.01%-0.0.05%-0.1%
static const double classifiers[24] = {
  0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
  0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
  0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
  0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
};

82
// This is used as a reference when computing the source variance for the
83
//  purpose of activity masking.
84 85
// Eventually this should be replaced by custom no-reference routines,
//  which will be faster.
86
static const uint8_t VP9_VAR_OFFS[64] = {
clang-format's avatar
clang-format committed
87 88 89 90 91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
92 93
};

94 95
#if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
clang-format's avatar
clang-format committed
96 97 98 99 100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
101 102 103
};

static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
clang-format's avatar
clang-format committed
104 105 106 107 108 109 110 111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
112 113 114
};

static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
clang-format's avatar
clang-format committed
115 116 117 118 119 120 121 122 123 124
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  128 * 16
125 126 127
};
#endif  // CONFIG_VP9_HIGHBITDEPTH

128 129
unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
                                  BLOCK_SIZE bs) {
130
  unsigned int sse;
clang-format's avatar
clang-format committed
131 132
  const unsigned int var =
      cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
133
  return var;
134 135
}

136
#if CONFIG_VP9_HIGHBITDEPTH
137 138
unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
                                       BLOCK_SIZE bs, int bd) {
139 140 141
  unsigned int var, sse;
  switch (bd) {
    case 10:
clang-format's avatar
clang-format committed
142 143 144
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
145 146
      break;
    case 12:
clang-format's avatar
clang-format committed
147 148 149
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
150 151 152
      break;
    case 8:
    default:
clang-format's avatar
clang-format committed
153 154 155
      var =
          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                             CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
156 157
      break;
  }
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
  return var;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
                                           const struct buf_2d *ref,
                                           BLOCK_SIZE bs) {
  return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs),
                            num_pels_log2_lookup[bs]);
}

#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
                                                const struct buf_2d *ref,
                                                BLOCK_SIZE bs, int bd) {
  return (unsigned int)ROUND64_POWER_OF_TWO(
      (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd),
      num_pels_log2_lookup[bs]);
176 177 178
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

179
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
180 181
                                                   const struct buf_2d *ref,
                                                   int mi_row, int mi_col,
182
                                                   BLOCK_SIZE bs) {
183 184
  unsigned int sse, var;
  uint8_t *last_y;
185
  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
186 187 188 189 190

  assert(last != NULL);
  last_y =
      &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
  var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
191 192 193
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}

194
static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
clang-format's avatar
clang-format committed
195 196 197
                                                   int mi_row, int mi_col) {
  unsigned int var = get_sby_perpixel_diff_variance(
      cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
198
  if (var < 8)
199
    return BLOCK_64X64;
200
  else if (var < 128)
201
    return BLOCK_32X32;
202 203 204 205
  else if (var < 2048)
    return BLOCK_16X16;
  else
    return BLOCK_8X8;
206 207
}

208 209
// Lighter version of set_offsets that only sets the mode info
// pointers.
Jingning Han's avatar
Jingning Han committed
210
static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
211
                                         MACROBLOCK *const x,
clang-format's avatar
clang-format committed
212
                                         MACROBLOCKD *const xd, int mi_row,
Jingning Han's avatar
Jingning Han committed
213
                                         int mi_col) {
214
  const int idx_str = xd->mi_stride * mi_row + mi_col;
215 216
  xd->mi = cm->mi_grid_visible + idx_str;
  xd->mi[0] = cm->mi + idx_str;
217
  x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
218 219 220
}

static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
221 222
                        MACROBLOCK *const x, int mi_row, int mi_col,
                        BLOCK_SIZE bsize) {
223 224
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
225
  MODE_INFO *mi;
226 227 228
  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  const int mi_height = num_8x8_blocks_high_lookup[bsize];
  const struct segmentation *const seg = &cm->seg;
Alex Converse's avatar
Alex Converse committed
229
  MvLimits *const mv_limits = &x->mv_limits;
230

231
  set_skip_context(xd, mi_row, mi_col);
232

233 234
  set_mode_info_offsets(cm, x, xd, mi_row, mi_col);

Scott LaVarnway's avatar
Scott LaVarnway committed
235
  mi = xd->mi[0];
236 237

  // Set up destination pointers.
238
  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
239 240 241

  // Set up limit values for MV components.
  // Mv beyond the range do not produce new/different prediction block.
Alex Converse's avatar
Alex Converse committed
242 243 244 245
  mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
  mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
  mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
246 247 248

  // Set up distance of MB to edge of frame in 1/8th pel units.
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
clang-format's avatar
clang-format committed
249 250
  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
                 cm->mi_cols);
251 252 253 254 255

  // Set up source buffers.
  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);

  // R/D setup.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
256 257
  x->rddiv = cpi->rd.RDDIV;
  x->rdmult = cpi->rd.RDMULT;
258 259 260

  // Setup segment ID.
  if (seg->enabled) {
261
    if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
262
        cpi->oxcf.aq_mode != EQUATOR360_AQ) {
clang-format's avatar
clang-format committed
263 264
      const uint8_t *const map =
          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
Scott LaVarnway's avatar
Scott LaVarnway committed
265
      mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
266 267 268
    }
    vp9_init_plane_quantizers(cpi, x);

Scott LaVarnway's avatar
Scott LaVarnway committed
269
    x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
270
  } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
271
    mi->segment_id = 0;
272 273
    x->encode_breakout = cpi->encode_breakout;
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
274 275 276

  // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
  xd->tile = *tile;
277 278
}

279 280 281
static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      int mi_row, int mi_col,
                                      BLOCK_SIZE bsize) {
clang-format's avatar
clang-format committed
282 283 284 285
  const int block_width =
      VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
  const int block_height =
      VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
286 287
  const int mi_stride = xd->mi_stride;
  MODE_INFO *const src_mi = xd->mi[0];
288
  int i, j;
289

290
  for (j = 0; j < block_height; ++j)
clang-format's avatar
clang-format committed
291
    for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
292 293
}

clang-format's avatar
clang-format committed
294 295
static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
                           MACROBLOCKD *const xd, int mi_row, int mi_col,
296 297
                           BLOCK_SIZE bsize) {
  if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
298
    set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
Scott LaVarnway's avatar
Scott LaVarnway committed
299
    xd->mi[0]->sb_type = bsize;
300 301 302 303
  }
}

typedef struct {
304 305 306 307 308 309
  // This struct is used for computing variance in choose_partitioning(), where
  // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even
  // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16
  // * 16 = 2^32).
  uint32_t sum_square_error;
  int32_t sum_error;
310
  int log2_count;
311 312 313 314 315 316 317 318 319 320 321 322
  int variance;
} var;

typedef struct {
  var none;
  var horz[2];
  var vert[2];
} partition_variance;

typedef struct {
  partition_variance part_variances;
  var split[4];
323 324 325 326 327
} v4x4;

typedef struct {
  partition_variance part_variances;
  v4x4 split[4];
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
} v8x8;

typedef struct {
  partition_variance part_variances;
  v8x8 split[4];
} v16x16;

typedef struct {
  partition_variance part_variances;
  v16x16 split[4];
} v32x32;

typedef struct {
  partition_variance part_variances;
  v32x32 split[4];
} v64x64;

typedef struct {
  partition_variance *part_variances;
  var *split[4];
} variance_node;

typedef enum {
  V16X16,
  V32X32,
  V64X64,
} TREE_LEVEL;

static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
  int i;
358
  node->part_variances = NULL;
359 360
  switch (bsize) {
    case BLOCK_64X64: {
clang-format's avatar
clang-format committed
361
      v64x64 *vt = (v64x64 *)data;
362 363 364 365 366 367
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_32X32: {
clang-format's avatar
clang-format committed
368
      v32x32 *vt = (v32x32 *)data;
369 370 371 372 373 374
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_16X16: {
clang-format's avatar
clang-format committed
375
      v16x16 *vt = (v16x16 *)data;
376 377 378 379 380 381
      node->part_variances = &vt->part_variances;
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_8X8: {
clang-format's avatar
clang-format committed
382
      v8x8 *vt = (v8x8 *)data;
383
      node->part_variances = &vt->part_variances;
384 385 386 387 388
      for (i = 0; i < 4; i++)
        node->split[i] = &vt->split[i].part_variances.none;
      break;
    }
    case BLOCK_4X4: {
clang-format's avatar
clang-format committed
389
      v4x4 *vt = (v4x4 *)data;
390
      node->part_variances = &vt->part_variances;
clang-format's avatar
clang-format committed
391
      for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
392 393 394 395
      break;
    }
    default: {
      assert(0);
396
      break;
397 398 399 400 401
    }
  }
}

// Set variance values given sum square error, sum error, count.
402
static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
403 404
  v->sum_square_error = s2;
  v->sum_error = s;
405
  v->log2_count = c;
406 407 408
}

static void get_variance(var *v) {
clang-format's avatar
clang-format committed
409 410 411 412
  v->variance =
      (int)(256 * (v->sum_square_error -
                   ((v->sum_error * v->sum_error) >> v->log2_count)) >>
            v->log2_count);
413 414
}

415
static void sum_2_variances(const var *a, const var *b, var *r) {
416
  assert(a->log2_count == b->log2_count);
417
  fill_variance(a->sum_square_error + b->sum_square_error,
418
                a->sum_error + b->sum_error, a->log2_count + 1, r);
419 420 421 422
}

static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
  variance_node node;
423
  memset(&node, 0, sizeof(node));
424 425 426 427 428 429 430 431 432
  tree_to_node(data, bsize, &node);
  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
                  &node.part_variances->none);
}

clang-format's avatar
clang-format committed
433 434 435 436
static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
                               MACROBLOCKD *const xd, void *data,
                               BLOCK_SIZE bsize, int mi_row, int mi_col,
                               int64_t threshold, BLOCK_SIZE bsize_min,
Marco's avatar
Marco committed
437
                               int force_split) {
clang-format's avatar
clang-format committed
438
  VP9_COMMON *const cm = &cpi->common;
439 440 441
  variance_node vt;
  const int block_width = num_8x8_blocks_wide_lookup[bsize];
  const int block_height = num_8x8_blocks_high_lookup[bsize];
442

443 444 445
  assert(block_height == block_width);
  tree_to_node(data, bsize, &vt);

clang-format's avatar
clang-format committed
446
  if (force_split == 1) return 0;
447

448
  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
449 450
  // variance is below threshold, otherwise split will be selected.
  // No check for vert/horiz split as too few samples for variance.
451
  if (bsize == bsize_min) {
452
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
453
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
454 455
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
456
        vt.part_variances->none.variance < threshold) {
457
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
458 459 460
      return 1;
    }
    return 0;
461
  } else if (bsize > bsize_min) {
462
    // Variance already computed to set the force_split.
clang-format's avatar
clang-format committed
463
    if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
464
    // For key frame: take split for bsize above 32X32 or very high variance.
465 466
    if (cm->frame_type == KEY_FRAME &&
        (bsize > BLOCK_32X32 ||
clang-format's avatar
clang-format committed
467
         vt.part_variances->none.variance > (threshold << 4))) {
468 469 470 471 472
      return 0;
    }
    // If variance is low, take the bsize (no split).
    if (mi_col + block_width / 2 < cm->mi_cols &&
        mi_row + block_height / 2 < cm->mi_rows &&
473
        vt.part_variances->none.variance < threshold) {
474
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
475 476
      return 1;
    }
477

478
    // Check vertical split.
479
    if (mi_row + block_height / 2 < cm->mi_rows) {
Yaowu Xu's avatar
Yaowu Xu committed
480
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
481 482
      get_variance(&vt.part_variances->vert[0]);
      get_variance(&vt.part_variances->vert[1]);
483
      if (vt.part_variances->vert[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
484 485
          vt.part_variances->vert[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
486 487
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
488 489
        return 1;
      }
490
    }
491
    // Check horizontal split.
492
    if (mi_col + block_width / 2 < cm->mi_cols) {
Yaowu Xu's avatar
Yaowu Xu committed
493
      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
494 495
      get_variance(&vt.part_variances->horz[0]);
      get_variance(&vt.part_variances->horz[1]);
496
      if (vt.part_variances->horz[0].variance < threshold &&
Yaowu Xu's avatar
Yaowu Xu committed
497 498
          vt.part_variances->horz[1].variance < threshold &&
          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
499 500
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
        set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
501 502
        return 1;
      }
503
    }
504

505
    return 0;
506 507 508 509
  }
  return 0;
}

510 511 512
static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
                                         int width, int height,
                                         int content_state) {
513 514 515 516
  if (speed >= 8) {
    if (width <= 640 && height <= 480)
      return (5 * threshold_base) >> 2;
    else if ((content_state == kLowSadLowSumdiff) ||
517 518
             (content_state == kHighSadLowSumdiff) ||
             (content_state == kLowVarHighSumdiff))
519 520 521
      return (5 * threshold_base) >> 2;
  } else if (speed == 7) {
    if ((content_state == kLowSadLowSumdiff) ||
522 523
        (content_state == kHighSadLowSumdiff) ||
        (content_state == kLowVarHighSumdiff)) {
524 525 526 527 528 529
      return (5 * threshold_base) >> 2;
    }
  }
  return threshold_base;
}

Marco's avatar
Marco committed
530 531 532 533
// Set the variance split thresholds for following the block sizes:
// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
// currently only used on key frame.
534 535
static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
                               int content_state) {
Marco's avatar
Marco committed
536 537 538
  VP9_COMMON *const cm = &cpi->common;
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
  const int threshold_multiplier = is_key_frame ? 20 : 1;
clang-format's avatar
clang-format committed
539 540
  int64_t threshold_base =
      (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
541

Marco's avatar
Marco committed
542 543 544 545 546 547
  if (is_key_frame) {
    thresholds[0] = threshold_base;
    thresholds[1] = threshold_base >> 2;
    thresholds[2] = threshold_base >> 2;
    thresholds[3] = threshold_base << 2;
  } else {
548 549
    // Increase base variance threshold based on estimated noise level.
    if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
clang-format's avatar
clang-format committed
550 551
      NOISE_LEVEL noise_level =
          vp9_noise_estimate_extract_level(&cpi->noise_estimate);
552
      if (noise_level == kHigh)
553
        threshold_base = 3 * threshold_base;
554 555
      else if (noise_level == kMedium)
        threshold_base = threshold_base << 1;
556
      else if (noise_level < kLow)
557
        threshold_base = (7 * threshold_base) >> 3;
558
    }
559
#if CONFIG_VP9_TEMPORAL_DENOISING
560 561
    if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
        cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
562 563 564
      threshold_base =
          vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
                                content_state, cpi->svc.temporal_layer_id);
565
    else
566
      threshold_base =
567 568
          scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
                                    cm->height, content_state);
569
#else
570 571 572
    // Increase base variance threshold based on content_state/sum_diff level.
    threshold_base = scale_part_thresh_sumdiff(
        threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
573
#endif
574 575
    thresholds[0] = threshold_base;
    thresholds[2] = threshold_base << cpi->oxcf.speed;
576 577
    if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
      thresholds[2] = thresholds[2] << 1;
Marco's avatar
Marco committed
578
    if (cm->width <= 352 && cm->height <= 288) {
579 580
      thresholds[0] = threshold_base >> 3;
      thresholds[1] = threshold_base >> 1;
581
      thresholds[2] = threshold_base << 3;
582
    } else if (cm->width < 1280 && cm->height < 720) {
Marco's avatar
Marco committed
583
      thresholds[1] = (5 * threshold_base) >> 2;
584 585 586 587
    } else if (cm->width < 1920 && cm->height < 1080) {
      thresholds[1] = threshold_base << 1;
    } else {
      thresholds[1] = (5 * threshold_base) >> 1;
Marco's avatar
Marco committed
588 589 590 591
    }
  }
}

592 593
void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
                                           int content_state) {
Marco's avatar
Marco committed
594
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
595
  SPEED_FEATURES *const sf = &cpi->sf;
Marco's avatar
Marco committed
596
  const int is_key_frame = (cm->frame_type == KEY_FRAME);
597 598
  if (sf->partition_search_type != VAR_BASED_PARTITION &&
      sf->partition_search_type != REFERENCE_PARTITION) {
Yaowu Xu's avatar
Yaowu Xu committed
599 600
    return;
  } else {
601
    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
Marco's avatar
Marco committed
602
    // The thresholds below are not changed locally.
603
    if (is_key_frame) {
604
      cpi->vbp_threshold_sad = 0;
605
      cpi->vbp_threshold_copy = 0;
606 607
      cpi->vbp_bsize_min = BLOCK_8X8;
    } else {
Marco's avatar
Marco committed
608
      if (cm->width <= 352 && cm->height <= 288)
609
        cpi->vbp_threshold_sad = 10;
Marco's avatar
Marco committed
610
      else
clang-format's avatar
clang-format committed
611 612 613
        cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
                                     ? (cpi->y_dequant[q][1] << 1)
                                     : 1000;
614
      cpi->vbp_bsize_min = BLOCK_16X16;
615
      if (cm->width <= 352 && cm->height <= 288)
616 617
        cpi->vbp_threshold_copy = 4000;
      else if (cm->width <= 640 && cm->height <= 360)
618
        cpi->vbp_threshold_copy = 8000;
619
      else
620 621 622
        cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
                                      ? (cpi->y_dequant[q][1] << 3)
                                      : 8000;
623
    }
624
    cpi->vbp_threshold_minmax = 15 + (q >> 3);
Yaowu Xu's avatar
Yaowu Xu committed
625 626 627
  }
}

628 629 630 631 632 633
// Compute the minmax over the 8x8 subblocks.
static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
                              int dp, int x16_idx, int y16_idx,
#if CONFIG_VP9_HIGHBITDEPTH
                              int highbd_flag,
#endif
clang-format's avatar
clang-format committed
634
                              int pixels_wide, int pixels_high) {
635 636 637 638 639 640 641 642 643 644 645 646
  int k;
  int minmax_max = 0;
  int minmax_min = 255;
  // Loop over the 4 8x8 subblocks.
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    int min = 0;
    int max = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
647
        vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
clang-format's avatar
clang-format committed
648
                              d + y8_idx * dp + x8_idx, dp, &min, &max);
649
      } else {
clang-format's avatar
clang-format committed
650 651
        vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
                       dp, &min, &max);
652 653
      }
#else
clang-format's avatar
clang-format committed
654
      vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
655 656
                     &min, &max);
#endif
clang-format's avatar
clang-format committed
657 658
      if ((max - min) > minmax_max) minmax_max = (max - min);
      if ((max - min) < minmax_min) minmax_min = (max - min);
659 660 661 662 663
    }
  }
  return (minmax_max - minmax_min);
}

664 665 666 667 668
static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x8_idx, int y8_idx, v8x8 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
669
                                 int pixels_wide, int pixels_high,
670 671 672 673 674 675 676 677 678 679 680 681
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x4_idx = x8_idx + ((k & 1) << 2);
    int y4_idx = y8_idx + ((k >> 1) << 2);
    unsigned int sse = 0;
    int sum = 0;
    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
682
        s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
683
        if (!is_key_frame)
James Zern's avatar
James Zern committed
684
          d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
685
      } else {
James Zern's avatar
James Zern committed
686
        s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
687
        if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
688 689
      }
#else
James Zern's avatar
James Zern committed
690
      s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
clang-format's avatar
clang-format committed
691
      if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
692 693 694 695 696 697 698 699 700 701 702 703 704
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
                                 int dp, int x16_idx, int y16_idx, v16x16 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
                                 int highbd_flag,
#endif
clang-format's avatar
clang-format committed
705
                                 int pixels_wide, int pixels_high,
706 707 708 709 710 711 712 713 714 715 716 717
                                 int is_key_frame) {
  int k;
  for (k = 0; k < 4; k++) {
    int x8_idx = x16_idx + ((k & 1) << 3);
    int y8_idx = y16_idx + ((k >> 1) << 3);
    unsigned int sse = 0;
    int sum = 0;
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
      int s_avg;
      int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
James Zern's avatar
James Zern committed
718
        s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
719
        if (!is_key_frame)
James Zern's avatar
James Zern committed
720
          d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
721
      } else {
James Zern's avatar
James Zern committed
722
        s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
723
        if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
724 725
      }
#else
James Zern's avatar
James Zern committed
726
      s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
clang-format's avatar
clang-format committed
727
      if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
728 729 730 731 732 733 734 735
#endif
      sum = s_avg - d_avg;
      sse = sum * sum;
    }
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  }
}

JackyChen's avatar
JackyChen committed
736 737 738 739
// Check if most of the superblock is skin content, and if so, force split to
// 32x32, and set x->sb_is_skin for use in mode selection.
static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
                         int mi_row, int mi_col, int *force_split) {
clang-format's avatar
clang-format committed
740
  VP9_COMMON *const cm = &cpi->common;
741 742 743
#if CONFIG_VP9_HIGHBITDEPTH
  if (cm->use_highbitdepth) return 0;
#endif
JackyChen's avatar
JackyChen committed
744 745 746 747
  // Avoid checking superblocks on/near boundary and avoid low resolutions.
  // Note superblock may still pick 64X64 if y_sad is very small
  // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
  if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
clang-format's avatar
clang-format committed
748
                   mi_row + 8 < cm->mi_rows)) {
JackyChen's avatar
JackyChen committed
749 750 751 752 753 754 755 756 757 758 759 760 761 762
    int num_16x16_skin = 0;
    int num_16x16_nonskin = 0;
    uint8_t *ysignal = x->plane[0].src.buf;
    uint8_t *usignal = x->plane[1].src.buf;
    uint8_t *vsignal = x->plane[2].src.buf;
    int sp = x->plane[0].src.stride;
    int spuv = x->plane[1].src.stride;
    const int block_index = mi_row * cm->mi_cols + mi_col;
    const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
    const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
    const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
    const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
    // Loop through the 16x16 sub-blocks.
    int i, j;
clang-format's avatar
clang-format committed
763 764
    for (i = 0; i < ymis; i += 2) {
      for (j = 0; j < xmis; j += 2) {
JackyChen's avatar
JackyChen committed
765
        int bl_index = block_index + i * cm->mi_cols + j;
766
        int is_skin = cpi->skin_map[bl_index];
JackyChen's avatar
JackyChen committed
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
        num_16x16_skin += is_skin;
        num_16x16_nonskin += (1 - is_skin);
        if (num_16x16_nonskin > 3) {
          // Exit loop if at least 4 of the 16x16 blocks are not skin.
          i = ymis;
          break;
        }
        ysignal += 16;
        usignal += 8;
        vsignal += 8;
      }
      ysignal += (sp << 4) - 64;
      usignal += (spuv << 3) - 32;
      vsignal += (spuv << 3) - 32;
    }
    if (num_16x16_skin > 12) {
      *force_split = 1;
      return 1;
    }
  }
  return 0;
}

clang-format's avatar
clang-format committed
790
static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
791
                                  v64x64 *vt, int64_t thresholds[],
792 793 794
                                  MV_REFERENCE_FRAME ref_frame_partition,
                                  int mi_col, int mi_row) {
  int i, j;
clang-format's avatar
clang-format committed
795
  VP9_COMMON *const cm = &cpi->common;
796 797 798 799 800 801 802 803 804 805 806
  const int mv_thr = cm->width > 640 ? 8 : 4;
  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
  // int_pro mv is small. If the temporal variance is small set the flag
  // variance_low for the block. The variance threshold can be adjusted, the
  // higher the more aggressive.
  if (ref_frame_partition == LAST_FRAME &&
      (cpi->sf.short_circuit_low_temp_var == 1 ||
       (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
        xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
        xd->mi[0]->mv[0].as_mv.row < mv_thr &&
        xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
807 808 809
    if (xd->mi[0]->sb_type == BLOCK_64X64) {
      if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
        x->variance_low[0] = 1;
810 811 812 813 814 815 816 817 818 819 820 821
    } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 1] = 1;
      }
    } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
      for (i = 0; i < 2; i++) {
        if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
          x->variance_low[i + 3] = 1;
      }
    } else {
      for (i = 0; i < 4; i++) {
822 823 824 825 826 827 828 829 830 831
        const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
        const int idx_str =
            cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
        MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;

        if (cm->mi_cols <= mi_col + idx[i][1] ||
            cm->mi_rows <= mi_row + idx[i][0])
          continue;

        if ((*this_mi)->sb_type == BLOCK_32X32) {
832 833
          int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
                                     cpi->sf.short_circuit_low_temp_var == 3)
Jerome Jiang's avatar