vp9_firstpass.c 127 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include <limits.h>
12
#include <math.h>
13
#include <stdio.h>
14

15
#include "./vpx_dsp_rtcd.h"
16 17
#include "./vpx_scale_rtcd.h"

18
#include "vpx_dsp/vpx_dsp_common.h"
19
#include "vpx_mem/vpx_mem.h"
20
#include "vpx_ports/mem.h"
21
#include "vpx_ports/system_state.h"
22 23 24 25 26
#include "vpx_scale/vpx_scale.h"
#include "vpx_scale/yv12config.h"

#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_quant_common.h"
27
#include "vp9/common/vp9_reconinter.h"  // vp9_setup_dst_planes()
28
#include "vp9/encoder/vp9_aq_variance.h"
29
#include "vp9/encoder/vp9_block.h"
30 31
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_ethread.h"
35 36 37
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mcomp.h"
38
#include "vp9/encoder/vp9_quantize.h"
39
#include "vp9/encoder/vp9_rd.h"
40
#include "vpx_dsp/variance.h"
John Koleszar's avatar
John Koleszar committed
41

42 43
#define OUTPUT_FPF 0
#define ARF_STATS_OUTPUT 0
44 45 46 47 48
#define COMPLEXITY_STATS_OUTPUT 0

#ifdef CORPUS_VBR_EXPERIMENT
#define CORPUS_VBR_MIDPOINT 82.0
#endif
49 50 51 52 53 54

#define FIRST_PASS_Q 10.0
#define GF_MAX_BOOST 96.0
#define INTRA_MODE_PENALTY 1024
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
55
#define NEW_MV_MODE_PENALTY 32
56 57 58 59 60 61
#define DARK_THRESH 64
#define DEFAULT_GRP_WEIGHT 1.0
#define RC_FACTOR_MIN 0.75
#define RC_FACTOR_MAX 1.75
#define SECTION_NOISE_DEF 250.0
#define LOW_I_THRESH 24000
62 63 64

#define NCOUNT_INTRA_THRESH 8192
#define NCOUNT_INTRA_FACTOR 3
65

66
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
John Koleszar's avatar
John Koleszar committed
67

68 69 70
#if ARF_STATS_OUTPUT
unsigned int arf_count = 0;
#endif
71

72 73
// Resets the first pass file to the given position using a relative seek from
// the current position.
74
static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) {
75
  p->stats_in = position;
John Koleszar's avatar
John Koleszar committed
76 77
}

Adrian Grange's avatar
Adrian Grange committed
78
// Read frame stats at an offset from the current position.
79 80 81 82
static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
  if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
      (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
    return NULL;
John Koleszar's avatar
John Koleszar committed
83 84
  }

85
  return &p->stats_in[offset];
86 87
}

Paul Wilkins's avatar
Paul Wilkins committed
88
static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
89
  if (p->stats_in >= p->stats_in_end) return EOF;
90

91 92
  *fps = *p->stats_in;
  ++p->stats_in;
John Koleszar's avatar
John Koleszar committed
93
  return 1;
94 95
}

96 97
static void output_stats(FIRSTPASS_STATS *stats,
                         struct vpx_codec_pkt_list *pktlist) {
John Koleszar's avatar
John Koleszar committed
98 99 100 101 102
  struct vpx_codec_cx_pkt pkt;
  pkt.kind = VPX_CODEC_STATS_PKT;
  pkt.data.twopass_stats.buf = stats;
  pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
  vpx_codec_pkt_list_add(pktlist, &pkt);
103 104 105

// TEMP debug code
#if OUTPUT_FPF
John Koleszar's avatar
John Koleszar committed
106 107 108 109
  {
    FILE *fpfile;
    fpfile = fopen("firstpass.stt", "a");

110
    fprintf(fpfile,
111
            "%12.0lf %12.4lf %12.2lf %12.2lf %12.2lf %12.0lf %12.4lf %12.4lf"
112
            "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
113 114
            "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.0lf %12.0lf"
            "%12.4lf"
115
            "\n",
116 117 118
            stats->frame, stats->weight, stats->intra_error, stats->coded_error,
            stats->sr_coded_error, stats->frame_noise_energy, stats->pcnt_inter,
            stats->pcnt_motion, stats->pcnt_second_ref, stats->pcnt_neutral,
119
            stats->pcnt_intra_low, stats->pcnt_intra_high,
120 121 122
            stats->intra_skip_pct, stats->intra_smooth_pct,
            stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr,
            stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv,
123
            stats->MVcv, stats->mv_in_out_count, stats->count, stats->duration);
John Koleszar's avatar
John Koleszar committed
124 125
    fclose(fpfile);
  }
126 127 128
#endif
}

129 130
#if CONFIG_FP_MB_STATS
static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm,
131
                              struct vpx_codec_pkt_list *pktlist) {
132 133 134
  struct vpx_codec_cx_pkt pkt;
  pkt.kind = VPX_CODEC_FPMB_STATS_PKT;
  pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
135
  pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t);
136 137 138 139
  vpx_codec_pkt_list_add(pktlist, &pkt);
}
#endif

John Koleszar's avatar
John Koleszar committed
140
static void zero_stats(FIRSTPASS_STATS *section) {
141 142 143 144 145
  section->frame = 0.0;
  section->weight = 0.0;
  section->intra_error = 0.0;
  section->coded_error = 0.0;
  section->sr_coded_error = 0.0;
146
  section->frame_noise_energy = 0.0;
147 148 149 150 151 152
  section->pcnt_inter = 0.0;
  section->pcnt_motion = 0.0;
  section->pcnt_second_ref = 0.0;
  section->pcnt_neutral = 0.0;
  section->intra_skip_pct = 0.0;
  section->intra_smooth_pct = 0.0;
153 154
  section->pcnt_intra_low = 0.0;
  section->pcnt_intra_high = 0.0;
155
  section->inactive_zone_rows = 0.0;
156
  section->inactive_zone_cols = 0.0;
157 158 159 160 161 162 163 164 165 166
  section->MVr = 0.0;
  section->mvr_abs = 0.0;
  section->MVc = 0.0;
  section->mvc_abs = 0.0;
  section->MVrv = 0.0;
  section->MVcv = 0.0;
  section->mv_in_out_count = 0.0;
  section->count = 0.0;
  section->duration = 1.0;
  section->spatial_layer_id = 0;
167 168
}

169 170
static void accumulate_stats(FIRSTPASS_STATS *section,
                             const FIRSTPASS_STATS *frame) {
171 172 173 174 175 176
  section->frame += frame->frame;
  section->weight += frame->weight;
  section->spatial_layer_id = frame->spatial_layer_id;
  section->intra_error += frame->intra_error;
  section->coded_error += frame->coded_error;
  section->sr_coded_error += frame->sr_coded_error;
177
  section->frame_noise_energy += frame->frame_noise_energy;
178 179 180 181 182 183
  section->pcnt_inter += frame->pcnt_inter;
  section->pcnt_motion += frame->pcnt_motion;
  section->pcnt_second_ref += frame->pcnt_second_ref;
  section->pcnt_neutral += frame->pcnt_neutral;
  section->intra_skip_pct += frame->intra_skip_pct;
  section->intra_smooth_pct += frame->intra_smooth_pct;
184 185
  section->pcnt_intra_low += frame->pcnt_intra_low;
  section->pcnt_intra_high += frame->pcnt_intra_high;
186
  section->inactive_zone_rows += frame->inactive_zone_rows;
187
  section->inactive_zone_cols += frame->inactive_zone_cols;
188 189 190 191 192 193 194 195 196
  section->MVr += frame->MVr;
  section->mvr_abs += frame->mvr_abs;
  section->MVc += frame->MVc;
  section->mvc_abs += frame->mvc_abs;
  section->MVrv += frame->MVrv;
  section->MVcv += frame->MVcv;
  section->mv_in_out_count += frame->mv_in_out_count;
  section->count += frame->count;
  section->duration += frame->duration;
197 198
}

199 200
static void subtract_stats(FIRSTPASS_STATS *section,
                           const FIRSTPASS_STATS *frame) {
201 202 203 204 205
  section->frame -= frame->frame;
  section->weight -= frame->weight;
  section->intra_error -= frame->intra_error;
  section->coded_error -= frame->coded_error;
  section->sr_coded_error -= frame->sr_coded_error;
206
  section->frame_noise_energy -= frame->frame_noise_energy;
207 208 209 210 211 212
  section->pcnt_inter -= frame->pcnt_inter;
  section->pcnt_motion -= frame->pcnt_motion;
  section->pcnt_second_ref -= frame->pcnt_second_ref;
  section->pcnt_neutral -= frame->pcnt_neutral;
  section->intra_skip_pct -= frame->intra_skip_pct;
  section->intra_smooth_pct -= frame->intra_smooth_pct;
213 214
  section->pcnt_intra_low -= frame->pcnt_intra_low;
  section->pcnt_intra_high -= frame->pcnt_intra_high;
215
  section->inactive_zone_rows -= frame->inactive_zone_rows;
216
  section->inactive_zone_cols -= frame->inactive_zone_cols;
217 218 219 220 221 222 223 224 225
  section->MVr -= frame->MVr;
  section->mvr_abs -= frame->mvr_abs;
  section->MVc -= frame->MVc;
  section->mvc_abs -= frame->mvc_abs;
  section->MVrv -= frame->MVrv;
  section->MVcv -= frame->MVcv;
  section->mv_in_out_count -= frame->mv_in_out_count;
  section->count -= frame->count;
  section->duration -= frame->duration;
226 227
}

228 229 230 231 232
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
#define MAX_ACTIVE_AREA 1.0
static double calculate_active_area(const VP9_COMP *cpi,
233
                                    const FIRSTPASS_STATS *this_frame) {
234 235
  double active_pct;

236 237 238 239
  active_pct =
      1.0 -
      ((this_frame->intra_skip_pct / 2) +
       ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
240 241
  return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
}
242

243 244 245 246
// Get the average weighted error for the clip (or corpus)
static double get_distribution_av_err(TWO_PASS *const twopass) {
  const double av_weight =
      twopass->total_stats.weight / twopass->total_stats.count;
247 248 249
#ifdef CORPUS_VBR_EXPERIMENT
  return av_weight * CORPUS_VBR_MIDPOINT;
#else
250 251
  return (twopass->total_stats.coded_error * av_weight) /
         twopass->total_stats.count;
252
#endif
253 254
}

255 256
// Calculate a modified Error used in distributing bits between easier and
// harder frames.
257
#define ACT_AREA_CORRECTION 0.5
paulwilkins's avatar
paulwilkins committed
258 259
static double calculate_mod_frame_score(const VP9_COMP *cpi,
                                        const VP9EncoderConfig *oxcf,
260 261
                                        const FIRSTPASS_STATS *this_frame,
                                        const double av_err) {
paulwilkins's avatar
paulwilkins committed
262
  double modified_score =
263 264 265
      av_err * pow(this_frame->coded_error * this_frame->weight /
                       DOUBLE_DIVIDE_CHECK(av_err),
                   oxcf->two_pass_vbrbias / 100.0);
266 267 268 269 270 271

  // Correction for active area. Frames with a reduced active area
  // (eg due to formatting bars) have a higher error per mb for the
  // remaining active MBs. The correction here assumes that coding
  // 0.5N blocks of complexity 2X is a little easier than coding N
  // blocks of complexity X.
paulwilkins's avatar
paulwilkins committed
272
  modified_score *=
273
      pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
274

paulwilkins's avatar
paulwilkins committed
275 276 277 278 279
  return modified_score;
}
static double calculate_norm_frame_score(const VP9_COMP *cpi,
                                         const TWO_PASS *twopass,
                                         const VP9EncoderConfig *oxcf,
280 281
                                         const FIRSTPASS_STATS *this_frame,
                                         const double av_err) {
paulwilkins's avatar
paulwilkins committed
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
  double modified_score =
      av_err * pow(this_frame->coded_error * this_frame->weight /
                       DOUBLE_DIVIDE_CHECK(av_err),
                   oxcf->two_pass_vbrbias / 100.0);

  const double min_score = (double)(oxcf->two_pass_vbrmin_section) / 100.0;
  const double max_score = (double)(oxcf->two_pass_vbrmax_section) / 100.0;

  // Correction for active area. Frames with a reduced active area
  // (eg due to formatting bars) have a higher error per mb for the
  // remaining active MBs. The correction here assumes that coding
  // 0.5N blocks of complexity 2X is a little easier than coding N
  // blocks of complexity X.
  modified_score *=
      pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);

  // Normalize to a midpoint score.
  modified_score /= DOUBLE_DIVIDE_CHECK(twopass->mean_mod_score);

  return fclamp(modified_score, min_score, max_score);
John Koleszar's avatar
John Koleszar committed
302 303
}

304
// This function returns the maximum target rate per frame.
305 306
static int frame_max_bits(const RATE_CONTROL *rc,
                          const VP9EncoderConfig *oxcf) {
307
  int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth *
308 309
                      (int64_t)oxcf->two_pass_vbrmax_section) /
                     100;
Yaowu Xu's avatar
Yaowu Xu committed
310
  if (max_bits < 0)
311
    max_bits = 0;
312 313
  else if (max_bits > rc->max_frame_bandwidth)
    max_bits = rc->max_frame_bandwidth;
314

Yaowu Xu's avatar
Yaowu Xu committed
315
  return (int)max_bits;
John Koleszar's avatar
John Koleszar committed
316 317
}

318
void vp9_init_first_pass(VP9_COMP *cpi) {
319
  zero_stats(&cpi->twopass.total_stats);
John Koleszar's avatar
John Koleszar committed
320 321
}

322
void vp9_end_first_pass(VP9_COMP *cpi) {
323
  if (is_two_pass_svc(cpi)) {
324 325 326 327 328 329 330 331
    int i;
    for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
      output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
                   cpi->output_pkt_list);
    }
  } else {
    output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
  }
332 333 334

  vpx_free(cpi->twopass.fp_mb_float_stats);
  cpi->twopass.fp_mb_float_stats = NULL;
335
}
John Koleszar's avatar
John Koleszar committed
336

337
static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
338
  switch (bsize) {
339 340 341 342
    case BLOCK_8X8: return vpx_mse8x8;
    case BLOCK_16X8: return vpx_mse16x8;
    case BLOCK_8X16: return vpx_mse8x16;
    default: return vpx_mse16x16;
343 344 345
  }
}

346 347 348
static unsigned int get_prediction_error(BLOCK_SIZE bsize,
                                         const struct buf_2d *src,
                                         const struct buf_2d *ref) {
349
  unsigned int sse;
350
  const vpx_variance_fn_t fn = get_block_variance_fn(bsize);
351
  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
352
  return sse;
353 354
}

355
#if CONFIG_VP9_HIGHBITDEPTH
356
static vpx_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
357 358 359 360
                                                      int bd) {
  switch (bd) {
    default:
      switch (bsize) {
361 362 363 364
        case BLOCK_8X8: return vpx_highbd_8_mse8x8;
        case BLOCK_16X8: return vpx_highbd_8_mse16x8;
        case BLOCK_8X16: return vpx_highbd_8_mse8x16;
        default: return vpx_highbd_8_mse16x16;
365 366 367 368
      }
      break;
    case 10:
      switch (bsize) {
369 370 371 372
        case BLOCK_8X8: return vpx_highbd_10_mse8x8;
        case BLOCK_16X8: return vpx_highbd_10_mse16x8;
        case BLOCK_8X16: return vpx_highbd_10_mse8x16;
        default: return vpx_highbd_10_mse16x16;
373 374 375 376
      }
      break;
    case 12:
      switch (bsize) {
377 378 379 380
        case BLOCK_8X8: return vpx_highbd_12_mse8x8;
        case BLOCK_16X8: return vpx_highbd_12_mse16x8;
        case BLOCK_8X16: return vpx_highbd_12_mse8x16;
        default: return vpx_highbd_12_mse16x16;
381 382 383 384 385 386 387 388 389 390
      }
      break;
  }
}

static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
                                                const struct buf_2d *src,
                                                const struct buf_2d *ref,
                                                int bd) {
  unsigned int sse;
391
  const vpx_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
392 393 394 395 396
  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
  return sse;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

397 398
// Refine the motion search range according to the frame dimension
// for first pass test.
399
static int get_search_range(const VP9_COMP *cpi) {
400
  int sr = 0;
401
  const int dim = VPXMIN(cpi->initial_width, cpi->initial_height);
402

403
  while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr;
404 405 406
  return sr;
}

407
static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
408
                                     const MV *ref_mv, MV *best_mv,
409
                                     int *best_motion_err) {
410
  MACROBLOCKD *const xd = &x->e_mbd;
411 412
  MV tmp_mv = { 0, 0 };
  MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
413
  int num00, tmp_err, n;
414
  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
415
  vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
416
  const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
417

418 419
  int step_param = 3;
  int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
420
  const int sr = get_search_range(cpi);
421
  step_param += sr;
422 423
  further_steps -= sr;

Adrian Grange's avatar
Adrian Grange committed
424
  // Override the default variance function to use MSE.
425
  v_fn_ptr.vf = get_block_variance_fn(bsize);
426 427 428 429 430
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
431

Adrian Grange's avatar
Adrian Grange committed
432
  // Center the initial step/diamond search on best mv.
433
  tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
434 435
                                    step_param, x->sadperbit16, &num00,
                                    &v_fn_ptr, ref_mv);
436 437
  if (tmp_err < INT_MAX)
    tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
438
  if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
John Koleszar's avatar
John Koleszar committed
439 440 441

  if (tmp_err < *best_motion_err) {
    *best_motion_err = tmp_err;
442
    *best_mv = tmp_mv;
John Koleszar's avatar
John Koleszar committed
443 444
  }

Adrian Grange's avatar
Adrian Grange committed
445
  // Carry out further step/diamond searches as necessary.
John Koleszar's avatar
John Koleszar committed
446 447 448 449
  n = num00;
  num00 = 0;

  while (n < further_steps) {
450
    ++n;
John Koleszar's avatar
John Koleszar committed
451

452
    if (num00) {
453
      --num00;
454
    } else {
455
      tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
456 457
                                        step_param + n, x->sadperbit16, &num00,
                                        &v_fn_ptr, ref_mv);
458 459
      if (tmp_err < INT_MAX)
        tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
John Koleszar's avatar
John Koleszar committed
460
      if (tmp_err < INT_MAX - new_mv_mode_penalty)
John Koleszar's avatar
John Koleszar committed
461 462
        tmp_err += new_mv_mode_penalty;

John Koleszar's avatar
John Koleszar committed
463
      if (tmp_err < *best_motion_err) {
John Koleszar's avatar
John Koleszar committed
464
        *best_motion_err = tmp_err;
465
        *best_mv = tmp_mv;
John Koleszar's avatar
John Koleszar committed
466
      }
John Koleszar's avatar
John Koleszar committed
467
    }
John Koleszar's avatar
John Koleszar committed
468
  }
John Koleszar's avatar
John Koleszar committed
469 470
}

471 472
static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
  if (2 * mb_col + 1 < cm->mi_cols) {
473
    return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 : BLOCK_16X8;
474
  } else {
475
    return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 : BLOCK_8X8;
476 477 478
  }
}

479
static int find_fp_qindex(vpx_bit_depth_t bit_depth) {
480 481 482
  int i;

  for (i = 0; i < QINDEX_RANGE; ++i)
483
    if (vp9_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break;
484

485
  if (i == QINDEX_RANGE) i--;
486 487 488 489 490 491 492

  return i;
}

static void set_first_pass_params(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  if (!cpi->refresh_alt_ref_frame &&
493
      (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) {
494 495 496 497 498 499 500 501
    cm->frame_type = KEY_FRAME;
  } else {
    cm->frame_type = INTER_FRAME;
  }
  // Do not use periodic key frames.
  cpi->rc.frames_to_key = INT_MAX;
}

502 503 504 505 506 507
// Scale an sse threshold to account for 8/10/12 bit.
static int scale_sse_threshold(VP9_COMMON *cm, int thresh) {
  int ret_val = thresh;
#if CONFIG_VP9_HIGHBITDEPTH
  if (cm->use_highbitdepth) {
    switch (cm->bit_depth) {
508
      case VPX_BITS_8: ret_val = thresh; break;
509 510
      case VPX_BITS_10: ret_val = thresh << 4; break;
      case VPX_BITS_12: ret_val = thresh << 8; break;
511
      default:
512 513 514
        assert(0 &&
               "cm->bit_depth should be VPX_BITS_8, "
               "VPX_BITS_10 or VPX_BITS_12");
515 516 517
    }
  }
#else
518
  (void)cm;
519 520 521 522
#endif  // CONFIG_VP9_HIGHBITDEPTH
  return ret_val;
}

523 524 525
// This threshold is used to track blocks where to all intents and purposes
// the intra prediction error 0. Though the metric we test against
// is technically a sse we are mainly interested in blocks where all the pixels
526
// in the 8 bit domain have an error of <= 1 (where error = sse) so a
527
// linear scaling for 10 and 12 bit gives similar results.
528
#define UL_INTRA_THRESH 50
529 530
static int get_ul_intra_threshold(VP9_COMMON *cm) {
  int ret_val = UL_INTRA_THRESH;
531
#if CONFIG_VP9_HIGHBITDEPTH
532 533
  if (cm->use_highbitdepth) {
    switch (cm->bit_depth) {
534 535 536
      case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break;
      case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break;
      case VPX_BITS_12: ret_val = UL_INTRA_THRESH << 4; break;
537
      default:
538 539 540
        assert(0 &&
               "cm->bit_depth should be VPX_BITS_8, "
               "VPX_BITS_10 or VPX_BITS_12");
541 542
    }
  }
543
#else
544
  (void)cm;
545
#endif  // CONFIG_VP9_HIGHBITDEPTH
546 547
  return ret_val;
}
548 549 550 551 552 553 554

#define SMOOTH_INTRA_THRESH 4000
static int get_smooth_intra_threshold(VP9_COMMON *cm) {
  int ret_val = SMOOTH_INTRA_THRESH;
#if CONFIG_VP9_HIGHBITDEPTH
  if (cm->use_highbitdepth) {
    switch (cm->bit_depth) {
555 556 557
      case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break;
      case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break;
      case VPX_BITS_12: ret_val = SMOOTH_INTRA_THRESH << 8; break;
558
      default:
559 560 561
        assert(0 &&
               "cm->bit_depth should be VPX_BITS_8, "
               "VPX_BITS_10 or VPX_BITS_12");
562 563 564
    }
  }
#else
565
  (void)cm;
566
#endif  // CONFIG_VP9_HIGHBITDEPTH
567 568
  return ret_val;
}
569

570 571 572 573 574
#define FP_DN_THRESH 8
#define FP_MAX_DN_THRESH 16
#define KERNEL_SIZE 3

// Baseline Kernal weights for first pass noise metric
575 576
static uint8_t fp_dn_kernal_3[KERNEL_SIZE * KERNEL_SIZE] = { 1, 2, 1, 2, 4,
                                                             2, 1, 2, 1 };
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610

// Estimate noise at a single point based on the impace of a spatial kernal
// on the point value
static int fp_estimate_point_noise(uint8_t *src_ptr, const int stride) {
  int sum_weight = 0;
  int sum_val = 0;
  int i, j;
  int max_diff = 0;
  int diff;
  int dn_diff;
  uint8_t *tmp_ptr;
  uint8_t *kernal_ptr;
  uint8_t dn_val;
  uint8_t centre_val = *src_ptr;

  kernal_ptr = fp_dn_kernal_3;

  // Apply the kernal
  tmp_ptr = src_ptr - stride - 1;
  for (i = 0; i < KERNEL_SIZE; ++i) {
    for (j = 0; j < KERNEL_SIZE; ++j) {
      diff = abs((int)centre_val - (int)tmp_ptr[j]);
      max_diff = VPXMAX(max_diff, diff);
      if (diff <= FP_DN_THRESH) {
        sum_weight += *kernal_ptr;
        sum_val += (int)tmp_ptr[j] * (int)*kernal_ptr;
      }
      ++kernal_ptr;
    }
    tmp_ptr += stride;
  }

  if (max_diff < FP_MAX_DN_THRESH)
    // Update the source value with the new filtered value
611
    dn_val = (sum_val + (sum_weight >> 1)) / sum_weight;
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
  else
    dn_val = *src_ptr;

  // return the noise energy as the square of the difference between the
  // denoised and raw value.
  dn_diff = (int)*src_ptr - (int)dn_val;
  return dn_diff * dn_diff;
}
#if CONFIG_VP9_HIGHBITDEPTH
static int fp_highbd_estimate_point_noise(uint8_t *src_ptr, const int stride) {
  int sum_weight = 0;
  int sum_val = 0;
  int i, j;
  int max_diff = 0;
  int diff;
  int dn_diff;
  uint8_t *tmp_ptr;
  uint16_t *tmp_ptr16;
  uint8_t *kernal_ptr;
631
  uint16_t dn_val;
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
  uint16_t centre_val = *CONVERT_TO_SHORTPTR(src_ptr);

  kernal_ptr = fp_dn_kernal_3;

  // Apply the kernal
  tmp_ptr = src_ptr - stride - 1;
  for (i = 0; i < KERNEL_SIZE; ++i) {
    tmp_ptr16 = CONVERT_TO_SHORTPTR(tmp_ptr);
    for (j = 0; j < KERNEL_SIZE; ++j) {
      diff = abs((int)centre_val - (int)tmp_ptr16[j]);
      max_diff = VPXMAX(max_diff, diff);
      if (diff <= FP_DN_THRESH) {
        sum_weight += *kernal_ptr;
        sum_val += (int)tmp_ptr16[j] * (int)*kernal_ptr;
      }
      ++kernal_ptr;
    }
    tmp_ptr += stride;
  }

  if (max_diff < FP_MAX_DN_THRESH)
    // Update the source value with the new filtered value
654
    dn_val = (sum_val + (sum_weight >> 1)) / sum_weight;
655
  else
656
    dn_val = *CONVERT_TO_SHORTPTR(src_ptr);
657 658 659

  // return the noise energy as the square of the difference between the
  // denoised and raw value.
660
  dn_diff = (int)(*CONVERT_TO_SHORTPTR(src_ptr)) - (int)dn_val;
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
  return dn_diff * dn_diff;
}
#endif

// Estimate noise for a block.
static int fp_estimate_block_noise(MACROBLOCK *x, BLOCK_SIZE bsize) {
#if CONFIG_VP9_HIGHBITDEPTH
  MACROBLOCKD *xd = &x->e_mbd;
#endif
  uint8_t *src_ptr = &x->plane[0].src.buf[0];
  const int width = num_4x4_blocks_wide_lookup[bsize] * 4;
  const int height = num_4x4_blocks_high_lookup[bsize] * 4;
  int w, h;
  int stride = x->plane[0].src.stride;
  int block_noise = 0;

677 678 679
  // Sampled points to reduce cost overhead.
  for (h = 0; h < height; h += 2) {
    for (w = 0; w < width; w += 2) {
680 681 682 683 684 685 686 687 688 689 690 691
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
        block_noise += fp_highbd_estimate_point_noise(src_ptr, stride);
      else
        block_noise += fp_estimate_point_noise(src_ptr, stride);
#else
      block_noise += fp_estimate_point_noise(src_ptr, stride);
#endif
      ++src_ptr;
    }
    src_ptr += (stride - width);
  }
692
  return block_noise << 2;  // Scale << 2 to account for sampling.
693 694
}

695 696
// This function is called to test the functionality of row based
// multi-threading in unit tests for bit-exactness
697 698 699
static void accumulate_floating_point_stats(VP9_COMP *cpi,
                                            TileDataEnc *first_tile_col) {
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
700
  int mb_row, mb_col;
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
  first_tile_col->fp_data.intra_factor = 0;
  first_tile_col->fp_data.brightness_factor = 0;
  first_tile_col->fp_data.neutral_count = 0;
  for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
    for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
      const int mb_index = mb_row * cm->mb_cols + mb_col;
      first_tile_col->fp_data.intra_factor +=
          cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor;
      first_tile_col->fp_data.brightness_factor +=
          cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor;
      first_tile_col->fp_data.neutral_count +=
          cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count;
    }
  }
}

static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
                                 FIRSTPASS_DATA *fp_acc_data) {
  VP9_COMMON *const cm = &cpi->common;
  // The minimum error here insures some bit allocation to frames even
  // in static regions. The allocation per MB declines for larger formats
  // where the typical "real" energy per MB also falls.
  // Initial estimate here uses sqrt(mbs) to define the min_err, where the
  // number of mbs is proportional to the image area.
  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
                                                             : cpi->common.MBs;
  const double min_err = 200 * sqrt(num_mbs);

  // Clamp the image start to rows/2. This number of rows is discarded top
  // and bottom as dead data so rows / 2 means the frame is blank.
  if ((fp_acc_data->image_data_start_row > cm->mb_rows / 2) ||
      (fp_acc_data->image_data_start_row == INVALID_ROW)) {
    fp_acc_data->image_data_start_row = cm->mb_rows / 2;
  }
  // Exclude any image dead zone
  if (fp_acc_data->image_data_start_row > 0) {
    fp_acc_data->intra_skip_count =
clang-format's avatar
clang-format committed
738 739 740
        VPXMAX(0,
               fp_acc_data->intra_skip_count -
                   (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
741 742 743 744 745 746 747 748 749
  }

  fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs;
  fp_acc_data->brightness_factor =
      fp_acc_data->brightness_factor / (double)num_mbs;
  fps->weight = fp_acc_data->intra_factor * fp_acc_data->brightness_factor;

  fps->frame = cm->current_video_frame;
  fps->spatial_layer_id = cpi->svc.spatial_layer_id;
750 751 752 753 754 755 756 757

  fps->coded_error =
      ((double)(fp_acc_data->coded_error >> 8) + min_err) / num_mbs;
  fps->sr_coded_error =
      ((double)(fp_acc_data->sr_coded_error >> 8) + min_err) / num_mbs;
  fps->intra_error =
      ((double)(fp_acc_data->intra_error >> 8) + min_err) / num_mbs;

758 759 760 761 762 763
  fps->frame_noise_energy =
      (double)(fp_acc_data->frame_noise_energy) / (double)num_mbs;
  fps->count = 1.0;
  fps->pcnt_inter = (double)(fp_acc_data->intercount) / num_mbs;
  fps->pcnt_second_ref = (double)(fp_acc_data->second_ref_count) / num_mbs;
  fps->pcnt_neutral = (double)(fp_acc_data->neutral_count) / num_mbs;
764 765
  fps->pcnt_intra_low = (double)(fp_acc_data->intra_count_low) / num_mbs;
  fps->pcnt_intra_high = (double)(fp_acc_data->intra_count_high) / num_mbs;
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
  fps->intra_skip_pct = (double)(fp_acc_data->intra_skip_count) / num_mbs;
  fps->intra_smooth_pct = (double)(fp_acc_data->intra_smooth_count) / num_mbs;
  fps->inactive_zone_rows = (double)(fp_acc_data->image_data_start_row);
  // Currently set to 0 as most issues relate to letter boxing.
  fps->inactive_zone_cols = (double)0;

  if (fp_acc_data->mvcount > 0) {
    fps->MVr = (double)(fp_acc_data->sum_mvr) / fp_acc_data->mvcount;
    fps->mvr_abs = (double)(fp_acc_data->sum_mvr_abs) / fp_acc_data->mvcount;
    fps->MVc = (double)(fp_acc_data->sum_mvc) / fp_acc_data->mvcount;
    fps->mvc_abs = (double)(fp_acc_data->sum_mvc_abs) / fp_acc_data->mvcount;
    fps->MVrv = ((double)(fp_acc_data->sum_mvrs) -
                 ((double)(fp_acc_data->sum_mvr) * (fp_acc_data->sum_mvr) /
                  fp_acc_data->mvcount)) /
                fp_acc_data->mvcount;
    fps->MVcv = ((double)(fp_acc_data->sum_mvcs) -
                 ((double)(fp_acc_data->sum_mvc) * (fp_acc_data->sum_mvc) /
                  fp_acc_data->mvcount)) /
                fp_acc_data->mvcount;
    fps->mv_in_out_count =
        (double)(fp_acc_data->sum_in_vectors) / (fp_acc_data->mvcount * 2);
    fps->pcnt_motion = (double)(fp_acc_data->mvcount) / num_mbs;
  } else {
    fps->MVr = 0.0;
    fps->mvr_abs = 0.0;
    fps->MVc = 0.0;
    fps->mvc_abs = 0.0;
    fps->MVrv = 0.0;
    fps->MVcv = 0.0;
    fps->mv_in_out_count = 0.0;
    fps->pcnt_motion = 0.0;
  }
}

static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile,
                                      FIRSTPASS_DATA *fp_acc_data) {
  this_tile->fp_data.intra_factor += fp_acc_data->intra_factor;
  this_tile->fp_data.brightness_factor += fp_acc_data->brightness_factor;
  this_tile->fp_data.coded_error += fp_acc_data->coded_error;
  this_tile->fp_data.sr_coded_error += fp_acc_data->sr_coded_error;
  this_tile->fp_data.frame_noise_energy += fp_acc_data->frame_noise_energy;
  this_tile->fp_data.intra_error += fp_acc_data->intra_error;
  this_tile->fp_data.intercount += fp_acc_data->intercount;
  this_tile->fp_data.second_ref_count += fp_acc_data->second_ref_count;
  this_tile->fp_data.neutral_count += fp_acc_data->neutral_count;
811 812
  this_tile->fp_data.intra_count_low += fp_acc_data->intra_count_low;
  this_tile->fp_data.intra_count_high += fp_acc_data->intra_count_high;
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
  this_tile->fp_data.intra_skip_count += fp_acc_data->intra_skip_count;
  this_tile->fp_data.mvcount += fp_acc_data->mvcount;
  this_tile->fp_data.sum_mvr += fp_acc_data->sum_mvr;
  this_tile->fp_data.sum_mvr_abs += fp_acc_data->sum_mvr_abs;
  this_tile->fp_data.sum_mvc += fp_acc_data->sum_mvc;
  this_tile->fp_data.sum_mvc_abs += fp_acc_data->sum_mvc_abs;
  this_tile->fp_data.sum_mvrs += fp_acc_data->sum_mvrs;
  this_tile->fp_data.sum_mvcs += fp_acc_data->sum_mvcs;
  this_tile->fp_data.sum_in_vectors += fp_acc_data->sum_in_vectors;
  this_tile->fp_data.intra_smooth_count += fp_acc_data->intra_smooth_count;
  this_tile->fp_data.image_data_start_row =
      VPXMIN(this_tile->fp_data.image_data_start_row,
             fp_acc_data->image_data_start_row) == INVALID_ROW
          ? VPXMAX(this_tile->fp_data.image_data_start_row,
                   fp_acc_data->image_data_start_row)
          : VPXMIN(this_tile->fp_data.image_data_start_row,
                   fp_acc_data->image_data_start_row);
}

void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
                                       FIRSTPASS_DATA *fp_acc_data,
                                       TileDataEnc *tile_data, MV *best_ref_mv,
                                       int mb_row) {
  int mb_col;
  MACROBLOCK *const x = &td->mb;
838
  VP9_COMMON *const cm = &cpi->common;
839
  MACROBLOCKD *const xd = &x->e_mbd;
840
  TileInfo tile = tile_data->tile_info;
841 842
  struct macroblock_plane *const p = x->plane;
  struct macroblockd_plane *const pd = xd->plane;
843 844 845
  const PICK_MODE_CONTEXT *ctx = &td->pc_root->none;
  int i, c;
  int num_mb_cols = get_num_cols(tile_data->tile_info, 1);
John Koleszar's avatar
John Koleszar committed
846 847

  int recon_yoffset, recon_uvoffset;
848
  const int intrapenalty = INTRA_MODE_PENALTY;
849
  const MV zero_mv = { 0, 0 };
850 851 852 853 854
  int recon_y_stride, recon_uv_stride, uv_mb_height;

  YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
  YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
855
  const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
856

857 858 859
  LAYER_CONTEXT *const lc =
      is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
                           : NULL;
860
  MODE_INFO mi_above, mi_left;
861

862 863 864 865
  double mb_intra_factor;
  double mb_brightness_factor;
  double mb_neutral_count;

866 867 868 869
  // First pass code requires valid last and new frame buffers.
  assert(new_yv12 != NULL);
  assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));

870
  if (lc != NULL) {
871 872
    // Use either last frame or alt frame for motion search.
    if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
873 874 875
      first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
      if (first_ref_buf == NULL)
        first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
876 877 878
    }

    if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
879 880 881 882
      gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
      if (gld_yv12 == NULL) {
        gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
      }
883 884
    } else {
      gld_yv12 = NULL;
885 886 887
    }
  }

888 889 890
  xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) +
           (tile.mi_col_start >> 1);
  xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + (tile.mi_col_start >> 1);
John Koleszar's avatar
John Koleszar committed
891

892 893
  for (i = 0; i < MAX_MB_PLANE; ++i) {
    p[i].coeff = ctx->coeff_pbuf[i][1];
894
    p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
895
    pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
896
    p[i].eobs = ctx->eobs_pbuf[i][1];
897
  }
James Zern's avatar
James Zern committed
898

899 900 901 902
  recon_y_stride = new_yv12->y_stride;
  recon_uv_stride = new_yv12->uv_stride;
  uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);

903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
  // Reset above block coeffs.
  recon_yoffset =
      (mb_row * recon_y_stride * 16) + (tile.mi_col_start >> 1) * 16;
  recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height) +
                   (tile.mi_col_start >> 1) * uv_mb_height;

  // Set up limit values for motion vectors to prevent them extending
  // outside the UMV borders.
  x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
  x->mv_limits.row_max =
      ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;

  for (mb_col = tile.mi_col_start >> 1, c = 0; mb_col < (tile.mi_col_end >> 1);
       ++mb_col, c++) {
    int this_error;
    int this_intra_error;
    const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
    const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
    double log_intra;
    int level_sample;
    const int mb_index = mb_row * cm->mb_cols + mb_col;
John Koleszar's avatar
John Koleszar committed
924

925 926 927
#if CONFIG_FP_MB_STATS
    const int mb_index = mb_row * cm->mb_cols + mb_col;
#endif
John Koleszar's avatar
John Koleszar committed
928

929
    (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c);
John Koleszar's avatar
John Koleszar committed
930

931 932 933 934 935 936 937 938 939
    // Adjust to the next column of MBs.
    x->plane[0].src.buf = cpi->Source->y_buffer +
                          mb_row * 16 * x->plane[0].src.stride + mb_col * 16;
    x->plane[1].src.buf = cpi->Source->u_buffer +
                          mb_row * uv_mb_height * x->plane[1].src.stride +
                          mb_col * uv_mb_height;
    x->plane[2].src.buf = cpi->Source->v_buffer +
                          mb_row * uv_mb_height * x->plane[1].src.stride +
                          mb_col * uv_mb_height;
940

941
    vpx_clear_system_state();
942

943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
    xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
    xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
    xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
    xd->mi[0]->sb_type = bsize;
    xd->mi[0]->ref_frame[0] = INTRA_FRAME;
    set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize],
                   mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows,
                   cm->mi_cols);
    // Are edges available for intra prediction?
    // Since the firstpass does not populate the mi_grid_visible,
    // above_mi/left_mi must be overwritten with a nonzero value when edges
    // are available.  Required by vp9_predict_intra_block().
    xd->above_mi = (mb_row != 0) ? &mi_above : NULL;
    xd->left_mi = ((mb_col << 1) > tile.mi_col_start) ? &mi_left : NULL;

    // Do intra 16x16 prediction.
    x->skip_encode = 0;
    x->fp_src_pred = 0;
    // Do intra prediction based on source pixels for tile boundaries
    if ((mb_col == (tile.mi_col_start >> 1)) && mb_col != 0) {
      xd->left_mi = &mi_left;
      x->fp_src_pred = 1;
    }
    xd->mi[0]->mode = DC_PRED;
    xd->mi[0]->tx_size =
        use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
    // Fix - zero the 16x16 block first. This ensures correct this_error for
    // block sizes smaller than 16x16.
    vp9_zero_array(x->plane[0].src_diff, 256);
    vp9_encode_intra_block_plane(x, bsize, 0, 0);
    this_error = vpx_get_mb_ss(x->plane[0].src_diff);
    this_intra_error = this_error;

    // Keep a record of blocks that have very low intra error residual
    // (i.e. are in effect completely flat and untextured in the intra
    // domain). In natural videos this is uncommon, but it is much more
    // common in animations, graphics and screen content, so may be used
    // as a signal to detect these types of content.
    if (this_error < get_ul_intra_threshold(cm)) {
      ++(fp_acc_data->intra_skip_count);
    } else if ((mb_col > 0) &&
               (fp_acc_data->image_data_start_row == INVALID_ROW)) {
      fp_acc_data->image_data_start_row = mb_row;
    }
987

988 989 990 991 992 993
    // Blocks that are mainly smooth in the intra domain.
    // Some special accounting for CQ but also these are better for testing
    // noise levels.
    if (this_error < get_smooth_intra_threshold(cm)) {
      ++(fp_acc_data->intra_smooth_count);
    }
994

995 996 997 998 999 1000
    // Special case noise measurement for first frame.
    if (cm->current_video_frame == 0) {
      if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) {
        fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
      } else {
        fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
1001
      }
1002
    }
1003

1004
#if CONFIG_VP9_HIGHBITDEPTH
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
    if (cm->use_highbitdepth) {
      switch (cm->bit_depth) {
        case VPX_BITS_8: break;
        case VPX_BITS_10: this_error >>= 4; break;
        case VPX_BITS_12: this_error >>= 8; break;
        default:
          assert(0 &&
                 "cm->bit_depth should be VPX_BITS_8, "
                 "VPX_BITS_10 or VPX_BITS_12");
          return;
1015
      }
1016
    }
1017
#endif  // CONFIG_VP9_HIGHBITDEPTH
1018

1019 1020 1021
    vpx_clear_system_state();
    log_intra = log(this_error + 1.0);
    if (log_intra < 10.0) {
1022 1023
      mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05);
      fp_acc_data->intra_factor += mb_intra_factor;
1024
      if (cpi->row_mt_bit_exact)
1025 1026
        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor =
            mb_intra_factor;
1027 1028
    } else {
      fp_acc_data->intra_factor += 1.0;
1029
      if (cpi->row_mt_bit_exact)
1030
        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0;
1031
    }
1032 1033

#if CONFIG_VP9_HIGHBITDEPTH
1034 1035 1036
    if (cm->use_highbitdepth)
      level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
    else
1037
      level_sample = x->plane[0].src.buf[0];
1038 1039
#else
    level_sample = x->plane[0].src.buf[0];
1040
#endif
1041
    if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) {
1042 1043
      mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample));
      fp_acc_data->brightness_factor += mb_brightness_factor;
1044
      if (cpi->row_mt_bit_exact)
1045 1046
        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
            mb_brightness_factor;
1047 1048
    } else {
      fp_acc_data->brightness_factor += 1.0;
1049
      if (cpi->row_mt_bit_exact)
1050 1051
        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
            1.0;
1052
    }
1053

1054 1055 1056 1057 1058 1059 1060 1061
    // Intrapenalty below deals with situations where the intra and inter
    // error scores are very low (e.g. a plain black frame).
    // We do not have special cases in first pass for 0,0 and nearest etc so
    // all inter modes carry an overhead cost estimate for the mv.
    // When the error score is very low this causes us to pick all or lots of
    // INTRA modes and throw lots of key frames.
    // This penalty adds a cost matching that of a 0,0 mv to the intra case.
    this_error += intrapenalty;
John Koleszar's avatar
John Koleszar committed
1062

1063 1064
    // Accumulate the intra error.
    fp_acc_data->intra_error += (int64_t)this_error;
John Koleszar's avatar
John Koleszar committed
1065

1066
#if CONFIG_FP_MB_STATS
1067 1068 1069 1070
    if (cpi->use_fp_mb_stats) {
      // initialization
      cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
    }
1071 1072
#endif

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
    // Set up limit values for motion vectors to prevent them extending
    // outside the UMV borders.
    x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
    x->mv_limits.col_max =
        ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;

    // Other than for the first frame do a motion search.
    if ((lc == NULL && cm->current_video_frame > 0) ||
        (lc != NULL && lc->current_video_frame_in_layer > 0)) {
      int tmp_err, motion_error, raw_motion_error;
      // Assume 0,0 motion with no mv overhead.
      MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
      struct buf_2d unscaled_last_source_buf_2d;

      xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
1088
#if CONFIG_VP9_HIGHBITDEPTH
1089 1090 1091 1092
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        motion_error = highbd_get_prediction_error(
            bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
      } else {
1093 1094
        motion_error =
            get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
1095 1096 1097 1098
      }
#else
      motion_error =
          get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
1099
#endif  // CONFIG_VP9_HIGHBITDEPTH
1100

1101 1102 1103 1104 1105 1106
      // Compute the motion error of the 0,0 motion using the last source
      // frame as the reference. Skip the further motion search on
      // reconstructed frame if this error is small.
      unscaled_last_source_buf_2d.buf =
          cpi->unscaled_last_source->y_buffer + recon_yoffset;
      unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride;
1107
#if CONFIG_VP9_HIGHBITDEPTH
1108 1109 1110 1111
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        raw_motion_error = highbd_get_prediction_error(
            bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
      } else {
1112 1113
        raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                                &unscaled_last_source_buf_2d);
1114 1115 1116 1117
      }
#else
      raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                              &unscaled_last_source_buf_2d);
1118
#endif  // CONFIG_VP9_HIGHBITDEPTH
1119

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
      // TODO(pengchong): Replace the hard-coded threshold
      if (raw_motion_error > 25 || lc != NULL) {
        // Test last reference frame using the previous best mv as the
        // starting point (best reference) for the search.
        first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);

        // If the current best reference mv is not centered on 0,0 then do a
        // 0,0 based search as well.
        if (!is_zero_mv(best_ref_mv)) {
          tmp_err = INT_MAX;
          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err);

          if (tmp_err < motion_error) {
            motion_error = tmp_err;
            mv = tmp_mv;
John Koleszar's avatar
John Koleszar committed
1135
          }
1136
        }
John Koleszar's avatar
John Koleszar committed
1137

1138 1139 1140 1141 1142 1143
        // Search in an older reference frame.
        if (((lc == NULL && cm->current_video_frame > 1) ||
             (lc != NULL && lc->current_video_frame_in_layer > 1)) &&
            gld_yv12 != NULL) {
          // Assume 0,0 motion with no mv overhead.
          int gf_motion_error;
1144

1145
          xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
1146
#if CONFIG_VP9_HIGHBITDEPTH
1147 1148 1149 1150
          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
            gf_motion_error = highbd_get_prediction_error(
                bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
          } else {
1151 1152
            gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                                   &xd->plane[0].pre[0]);
1153 1154 1155 1156
          }
#else
          gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                                 &xd->plane[0].pre[0]);
1157
#endif  // CONFIG_VP9_HIGHBITDEPTH
1158

1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);

          if (gf_motion_error < motion_error && gf_motion_error < this_error)
            ++(fp_acc_data->second_ref_count);

          // Reset to last frame as reference buffer.
          xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
          xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
          xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;

          // In accumulating a score for the older reference frame take the
          // best of the motion predicted score and the intra coded error
          // (just as will be done for) accumulation of "coded_error" for
          // the last frame.
          if (gf_motion_error < this_error)
            fp_acc_data->sr_coded_error += gf_motion_error;
          else
            fp_acc_data->sr_coded_error += this_error;
1177
        } else {
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210
          fp_acc_data->sr_coded_error += motion_error;
        }
      } else {
        fp_acc_data->sr_coded_error += motion_error;
      }

      // Start by assuming that intra mode is best.
      best_ref_mv->row = 0;
      best_ref_mv->col = 0;

#if CONFIG_FP_MB_STATS
      if (cpi->use_fp_mb_stats) {
        // intra prediction statistics
        cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
        cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;
        cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
        if (this_error > FPMB_ERROR_LARGE_TH) {
          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
        } else if (this_error < FPMB_ERROR_SMALL_TH) {
          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
        }
      }
#endif

      if (motion_error <= this_error) {
        vpx_clear_system_state();

        // Keep a count of cases where the inter and intra were very close
        // and very low. This helps with scene cut detection for example in
        // cropped clips with black bars at the sides or top and bottom.
        if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
            (this_error < (2 * intrapenalty))) {
          fp_acc_data->neutral_count += 1.0;
1211
          if (cpi->row_mt_bit_exact)
1212 1213
            cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
                1.0;
1214 1215 1216 1217
          // Also track cases where the intra is not much worse than the inter
          // and use this in limiting the GF/arf group length.
        } else if ((this_error > NCOUNT_INTRA_THRESH) &&
                   (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
1218
          mb_neutral_count =
1219
              (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
1220
          fp_acc_data->neutral_count += mb_neutral_count;
1221
          if (cpi->row_mt_bit_exact)
1222 1223
            cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
                mb_neutral_count;
1224
        }
1225

1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
        mv.row *= 8;
        mv.col *= 8;
        this_error = motion_error;
        xd->mi[0]->mode = NEWMV;
        xd->mi[0]->mv[0].as_mv = mv;
        xd->mi[0]->tx_size = TX_4X4;
        xd->mi[0]->ref_frame[0] = LAST_FRAME;
        xd->mi[0]->ref_frame[1] = NONE;
        vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
        vp9_encode_sby_pass1(x, bsize);
        fp_acc_data->sum_mvr += mv.row;
        fp_acc_data->sum_mvr_abs += abs(mv.row);
        fp_acc_data->sum_mvc += mv.col;
        fp_acc_data->sum_mvc_abs += abs(mv.col);
        fp_acc_data->sum_mvrs += mv.row * mv.row;
        fp_acc_data->sum_mvcs += mv.col * mv.col;
        ++(fp_acc_data->intercount);

        *best_ref_mv = mv;
John Koleszar's avatar
John Koleszar committed
1245

1246 1247
#if CONFIG_FP_MB_STATS
        if (cpi->use_fp_mb_stats) {
1248
          // inter prediction statistics
1249
          cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
1250
          cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;
1251 1252 1253 1254 1255
          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
          if (this_error > FPMB_ERROR_LARGE_TH) {
            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
          } else if (this_error < FPMB_ERROR_SMALL_TH) {
            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
1256 1257 1258 1259
          }
        }
#endif

1260 1261
        if (!is_zero_mv(&mv)) {
          ++(fp_acc_data->mvcount);
John Koleszar's avatar
John Koleszar committed
1262

1263 1264
#if CONFIG_FP_MB_STATS
          if (cpi->use_fp_mb_stats) {
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
            cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_MOTION_ZERO_MASK;
            // check estimated motion direction
            if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) {
              // right direction
              cpi->twopass.frame_mb_stats_buf[mb_index] |=
                  FPMB_MOTION_RIGHT_MASK;
            } else if (mv.as_mv.row < 0 &&
                       abs(mv.as_mv.row) >= abs(mv.as_mv.col)) {
              // up direction
              cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_UP_MASK;
            } else if (mv.as_mv.col < 0 &&
                       abs(mv.as_mv.col) >= abs(mv.as_mv.row)) {
              // left direction
1278
              cpi->twopass.frame_mb_stats_buf[mb_index] |=
1279 1280 1281
                  FPMB_MOTION_LEFT_MASK;
            } else {
              // down direction
1282
              cpi->twopass.frame_mb_stats_buf[mb_index] |=
1283
                  FPMB_MOTION_DOWN_MASK;
1284
            }
1285 1286 1287
          }
#endif

1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319
          // Does the row vector point inwards or outwards?
          if (mb_row < cm->mb_rows / 2) {
            if (mv.row > 0)
              --(fp_acc_data->sum_in_vectors);
            else if (mv.row < 0)
              ++(fp_acc_data->sum_in_vectors);
          } else if (mb_row > cm->mb_rows / 2) {
            if (mv.row > 0)
              ++(fp_acc_data->sum_in_vectors);
            else if (mv.row < 0)
              --(fp_acc_data->sum_in_vectors);
          }

          // Does the col vector point inwards or outwards?
          if (mb_col < cm->mb_cols / 2) {
            if (mv.col > 0)
              --(fp_acc_data->sum_in_vectors);
            else if (mv.col < 0)
              ++(fp_acc_data->sum_in_vectors);
          } else if (mb_col > cm->mb_cols / 2) {
            if (mv.col > 0)
              ++(fp_acc_data->sum_in_vectors);
            else if (mv.col < 0)
              --(fp_acc_data->sum_in_vectors);
          }
          fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
        } else if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) {
          fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
        } else {  // 0,0 mv but high error
          fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
        }
      } else {  // Intra < inter error
1320 1321
        int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH);
        if (this_intra_error < scaled_low_intra_thresh) {
1322
          fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
1323 1324 1325 1326 1327
          if (motion_error < scaled_low_intra_thresh) {
            fp_acc_data->intra_count_low += 1.0;
          } else {
            fp_acc_data->intra_count_high += 1.0;
          }
1328
        } else {
1329
          fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
1330 1331
          fp_acc_data->intra_count_high += 1.0;
        }
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
      }
    } else {
      fp_acc_data->sr_coded_error += (int64_t)this_error;
    }
    fp_acc_data->coded_error += (int64_t)this_error;

    recon_yoffset += 16;
    recon_uvoffset += uv_mb_height;

    // Accumulate row level stats to the corresponding tile stats
1342
    if (cpi->row_mt && mb_col == (tile.mi_col_end >> 1) - 1)
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
      accumulate_fp_mb_row_stat(tile_data, fp_acc_data);

    (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, mb_row, c,