vp9_temporal_filter.c 28.2 KB
Newer Older
Johann's avatar
Johann committed
1 2 3 4 5 6 7 8 9 10
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

11
#include <assert.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
12 13
#include <math.h>
#include <limits.h>
Johann's avatar
Johann committed
14

15
#include "vp9/common/vp9_alloccommon.h"
Johann's avatar
Johann committed
16
#include "vp9/common/vp9_common.h"
17
#include "vp9/common/vp9_onyxc_int.h"
18
#include "vp9/common/vp9_quant_common.h"
19
#include "vp9/common/vp9_reconinter.h"
20
#include "vp9/encoder/vp9_encodeframe.h"
21
#include "vp9/encoder/vp9_ethread.h"
22
#include "vp9/encoder/vp9_extend.h"
23
#include "vp9/encoder/vp9_firstpass.h"
24
#include "vp9/encoder/vp9_mcomp.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
25
#include "vp9/encoder/vp9_encoder.h"
26
#include "vp9/encoder/vp9_quantize.h"
27 28
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
James Zern's avatar
James Zern committed
29
#include "vp9/encoder/vp9_temporal_filter.h"
30
#include "vpx_dsp/vpx_dsp_common.h"
Johann's avatar
Johann committed
31
#include "vpx_mem/vpx_mem.h"
32
#include "vpx_ports/mem.h"
Johann's avatar
Johann committed
33
#include "vpx_ports/vpx_timer.h"
34
#include "vpx_scale/vpx_scale.h"
Johann's avatar
Johann committed
35

36 37
static int fixed_divide[512];

clang-format's avatar
clang-format committed
38 39 40 41
static void temporal_filter_predictors_mb_c(
    MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
    int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
    uint8_t *pred, struct scale_factors *scale, int x, int y) {
42
  const int which_mv = 0;
43
  const MV mv = { mv_row, mv_col };
44
  const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
45

Alex Converse's avatar
Alex Converse committed
46 47
  enum mv_precision mv_precision_uv;
  int uv_stride;
48
  if (uv_block_width == 8) {
Alex Converse's avatar
Alex Converse committed
49 50 51 52 53 54
    uv_stride = (stride + 1) >> 1;
    mv_precision_uv = MV_PRECISION_Q4;
  } else {
    uv_stride = stride;
    mv_precision_uv = MV_PRECISION_Q3;
  }
John Koleszar's avatar
John Koleszar committed
55

56 57
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
58 59 60 61
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride,
                                     CONVERT_TO_SHORTPTR(&pred[0]), 16, &mv,
                                     scale, 16, 16, which_mv, kernel,
                                     MV_PRECISION_Q3, x, y, xd->bd);
clang-format's avatar
clang-format committed
62

63 64
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride,
                                     CONVERT_TO_SHORTPTR(&pred[256]),
clang-format's avatar
clang-format committed
65 66 67 68
                                     uv_block_width, &mv, scale, uv_block_width,
                                     uv_block_height, which_mv, kernel,
                                     mv_precision_uv, x, y, xd->bd);

69 70
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride,
                                     CONVERT_TO_SHORTPTR(&pred[512]),
clang-format's avatar
clang-format committed
71 72 73
                                     uv_block_width, &mv, scale, uv_block_width,
                                     uv_block_height, which_mv, kernel,
                                     mv_precision_uv, x, y, xd->bd);
74 75 76
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
77
  (void)xd;
clang-format's avatar
clang-format committed
78 79 80 81 82 83 84 85 86 87
  vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
                            which_mv, kernel, MV_PRECISION_Q3, x, y);

  vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
                            &mv, scale, uv_block_width, uv_block_height,
                            which_mv, kernel, mv_precision_uv, x, y);

  vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
                            &mv, scale, uv_block_width, uv_block_height,
                            which_mv, kernel, mv_precision_uv, x, y);
Johann's avatar
Johann committed
88
}
89

90
void vp9_temporal_filter_init(void) {
91 92 93
  int i;

  fixed_divide[0] = 0;
clang-format's avatar
clang-format committed
94
  for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
95 96
}

97 98 99
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
                                 const uint8_t *frame2,
                                 unsigned int block_width,
clang-format's avatar
clang-format committed
100
                                 unsigned int block_height, int strength,
Johann's avatar
Johann committed
101
                                 int filter_weight, uint32_t *accumulator,
102
                                 uint16_t *count) {
John Koleszar's avatar
John Koleszar committed
103 104 105
  unsigned int i, j, k;
  int modifier;
  int byte = 0;
106
  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
John Koleszar's avatar
John Koleszar committed
107

108 109 110 111 112 113
  assert(strength >= 0);
  assert(strength <= 6);

  assert(filter_weight >= 0);
  assert(filter_weight <= 2);

114 115
  for (i = 0, k = 0; i < block_height; i++) {
    for (j = 0; j < block_width; j++, k++) {
116 117 118 119 120 121 122 123
      int pixel_value = *frame2;

      // non-local mean approach
      int diff_sse[9] = { 0 };
      int idx, idy, index = 0;

      for (idy = -1; idy <= 1; ++idy) {
        for (idx = -1; idx <= 1; ++idx) {
124 125
          int row = (int)i + idy;
          int col = (int)j + idx;
126

clang-format's avatar
clang-format committed
127 128
          if (row >= 0 && row < (int)block_height && col >= 0 &&
              col < (int)block_width) {
129
            int diff = frame1[byte + idy * (int)stride + idx] -
clang-format's avatar
clang-format committed
130
                       frame2[idy * (int)block_width + idx];
131 132 133 134 135 136 137 138 139
            diff_sse[index] = diff * diff;
            ++index;
          }
        }
      }

      assert(index > 0);

      modifier = 0;
clang-format's avatar
clang-format committed
140
      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
141 142 143 144 145

      modifier *= 3;
      modifier /= index;

      ++frame2;
John Koleszar's avatar
John Koleszar committed
146

clang-format's avatar
clang-format committed
147
      modifier += rounding;
John Koleszar's avatar
John Koleszar committed
148 149
      modifier >>= strength;

clang-format's avatar
clang-format committed
150
      if (modifier > 16) modifier = 16;
John Koleszar's avatar
John Koleszar committed
151 152 153 154 155 156 157 158

      modifier = 16 - modifier;
      modifier *= filter_weight;

      count[k] += modifier;
      accumulator[k] += modifier * pixel_value;

      byte++;
Johann's avatar
Johann committed
159
    }
John Koleszar's avatar
John Koleszar committed
160

161
    byte += stride - block_width;
John Koleszar's avatar
John Koleszar committed
162
  }
Johann's avatar
Johann committed
163 164
}

165
#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
166
void vp9_highbd_temporal_filter_apply_c(
167
    const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
clang-format's avatar
clang-format committed
168
    unsigned int block_width, unsigned int block_height, int strength,
Johann's avatar
Johann committed
169
    int filter_weight, uint32_t *accumulator, uint16_t *count) {
170 171
  const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
  const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
172 173 174 175 176 177 178
  unsigned int i, j, k;
  int modifier;
  int byte = 0;
  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;

  for (i = 0, k = 0; i < block_height; i++) {
    for (j = 0; j < block_width; j++, k++) {
179 180 181 182 183 184
      int pixel_value = *frame2;
      int diff_sse[9] = { 0 };
      int idx, idy, index = 0;

      for (idy = -1; idy <= 1; ++idy) {
        for (idx = -1; idx <= 1; ++idx) {
185 186
          int row = (int)i + idy;
          int col = (int)j + idx;
187

clang-format's avatar
clang-format committed
188 189
          if (row >= 0 && row < (int)block_height && col >= 0 &&
              col < (int)block_width) {
190
            int diff = frame1[byte + idy * (int)stride + idx] -
clang-format's avatar
clang-format committed
191
                       frame2[idy * (int)block_width + idx];
192 193 194 195 196 197 198 199
            diff_sse[index] = diff * diff;
            ++index;
          }
        }
      }
      assert(index > 0);

      modifier = 0;
clang-format's avatar
clang-format committed
200
      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
201

202
      modifier *= 3;
203 204 205
      modifier /= index;

      ++frame2;
206 207 208
      modifier += rounding;
      modifier >>= strength;

clang-format's avatar
clang-format committed
209
      if (modifier > 16) modifier = 16;
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224

      modifier = 16 - modifier;
      modifier *= filter_weight;

      count[k] += modifier;
      accumulator[k] += modifier * pixel_value;

      byte++;
    }

    byte += stride - block_width;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

225
static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
226
                                                   ThreadData *td,
227 228
                                                   uint8_t *arf_frame_buf,
                                                   uint8_t *frame_ptr_buf,
229 230
                                                   int stride, MV *ref_mv) {
  MACROBLOCK *const x = &td->mb;
231
  MACROBLOCKD *const xd = &x->e_mbd;
232
  MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
233
  const SEARCH_METHODS search_method = HEX;
John Koleszar's avatar
John Koleszar committed
234 235
  int step_param;
  int sadpb = x->sadperbit16;
236
  uint32_t bestsme = UINT_MAX;
237 238
  uint32_t distortion;
  uint32_t sse;
239
  int cost_list[5];
240
  const MvLimits tmp_mv_limits = x->mv_limits;
John Koleszar's avatar
John Koleszar committed
241

clang-format's avatar
clang-format committed
242
  MV best_ref_mv1 = { 0, 0 };
Dmitry Kovalev's avatar
Dmitry Kovalev committed
243
  MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
John Koleszar's avatar
John Koleszar committed
244 245

  // Save input state
John Koleszar's avatar
John Koleszar committed
246
  struct buf_2d src = x->plane[0].src;
247
  struct buf_2d pre = xd->plane[0].pre[0];
John Koleszar's avatar
John Koleszar committed
248

Dmitry Kovalev's avatar
Dmitry Kovalev committed
249 250
  best_ref_mv1_full.col = best_ref_mv1.col >> 3;
  best_ref_mv1_full.row = best_ref_mv1.row >> 3;
John Koleszar's avatar
John Koleszar committed
251 252

  // Setup frame pointers
253 254 255 256
  x->plane[0].src.buf = arf_frame_buf;
  x->plane[0].src.stride = stride;
  xd->plane[0].pre[0].buf = frame_ptr_buf;
  xd->plane[0].pre[0].stride = stride;
John Koleszar's avatar
John Koleszar committed
257

258
  step_param = mv_sf->reduce_first_step_size;
259
  step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
John Koleszar's avatar
John Koleszar committed
260

261 262
  vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);

263
  vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
264 265
                        search_method, sadpb, cond_cost_list(cpi, cost_list),
                        &best_ref_mv1, ref_mv, 0, 0);
Johann's avatar
Johann committed
266

267 268 269
  /* restore UMV window */
  x->mv_limits = tmp_mv_limits;

270
  // Ignore mv costing by sending NULL pointer instead of cost array
clang-format's avatar
clang-format committed
271 272 273 274 275
  bestsme = cpi->find_fractional_mv_step(
      x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
      x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0,
      mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL,
      &distortion, &sse, NULL, 0, 0);
Johann's avatar
Johann committed
276

277
  // Restore input state
John Koleszar's avatar
John Koleszar committed
278
  x->plane[0].src = src;
279
  xd->plane[0].pre[0] = pre;
Johann's avatar
Johann committed
280

John Koleszar's avatar
John Koleszar committed
281
  return bestsme;
Johann's avatar
Johann committed
282 283
}

284 285 286
void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
                                       int mb_row, int mb_col_start,
                                       int mb_col_end) {
287 288 289 290 291 292
  ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data;
  YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames;
  int frame_count = arnr_filter_data->frame_count;
  int alt_ref_index = arnr_filter_data->alt_ref_index;
  int strength = arnr_filter_data->strength;
  struct scale_factors *scale = &arnr_filter_data->sf;
John Koleszar's avatar
John Koleszar committed
293 294
  int byte;
  int frame;
295
  int mb_col;
John Koleszar's avatar
John Koleszar committed
296
  unsigned int filter_weight;
297 298
  int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
  int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
Johann's avatar
Johann committed
299
  DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]);
300
  DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
301
  MACROBLOCKD *mbd = &td->mb.e_mbd;
302
  YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
303
  uint8_t *dst1, *dst2;
304
#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
305 306
  DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
  DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
307 308
  uint8_t *predictor;
#else
clang-format's avatar
clang-format committed
309
  DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
310
#endif
Alex Converse's avatar
Alex Converse committed
311
  const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
clang-format's avatar
clang-format committed
312
  const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
313 314 315 316
  // Addition of the tile col level offsets
  int mb_y_offset = mb_row * 16 * (f->y_stride) + 16 * mb_col_start;
  int mb_uv_offset =
      mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start;
John Koleszar's avatar
John Koleszar committed
317

318 319 320 321 322 323 324
#if CONFIG_VP9_HIGHBITDEPTH
  if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    predictor = CONVERT_TO_BYTEPTR(predictor16);
  } else {
    predictor = predictor8;
  }
#endif
325

326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
  // Source frames are extended to 16 pixels. This is different than
  //  L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
  // A 6/8 tap filter is used for motion search.  This requires 2 pixels
  //  before and 3 pixels after.  So the largest Y mv on a border would
  //  then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
  //  Y and therefore only extended by 8.  The largest mv that a UV block
  //  can support is 8 - VP9_INTERP_EXTEND.  A UV mv is half of a Y mv.
  //  (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
  //  8 - VP9_INTERP_EXTEND.
  // To keep the mv in play for both Y and UV planes the max that it
  //  can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
  td->mb.mv_limits.row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
  td->mb.mv_limits.row_max =
      ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND);

  for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) {
    int i, j, k;
    int stride;
    MV ref_mv;

Johann's avatar
Johann committed
346 347
    vp9_zero_array(accumulator, 16 * 16 * 3);
    vp9_zero_array(count, 16 * 16 * 3);
348 349 350 351 352

    td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
    td->mb.mv_limits.col_max =
        ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND);

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
    if (cpi->oxcf.content == VP9E_CONTENT_FILM) {
      unsigned int src_variance;
      struct buf_2d src;

      src.buf = f->y_buffer + mb_y_offset;
      src.stride = f->y_stride;

#if CONFIG_VP9_HIGHBITDEPTH
      if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        src_variance =
            vp9_high_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16, mbd->bd);
      } else {
        src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16);
      }
#else
      src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16);
#endif  // CONFIG_VP9_HIGHBITDEPTH

      if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2);
    }

374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
    for (frame = 0; frame < frame_count; frame++) {
      const uint32_t thresh_low = 10000;
      const uint32_t thresh_high = 20000;

      if (frames[frame] == NULL) continue;

      ref_mv.row = 0;
      ref_mv.col = 0;

      if (frame == alt_ref_index) {
        filter_weight = 2;
      } else {
        // Find best match in this frame by MC
        uint32_t err = temporal_filter_find_matching_mb_c(
            cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset,
            frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
            &ref_mv);

        // Assign higher weight to matching MB if its error
        // score is lower. If not applying MC default behavior
        // is to weight all MBs equal.
        filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
      }
John Koleszar's avatar
John Koleszar committed
397

398 399 400 401 402 403 404 405
      if (filter_weight != 0) {
        // Construct the predictors
        temporal_filter_predictors_mb_c(
            mbd, frames[frame]->y_buffer + mb_y_offset,
            frames[frame]->u_buffer + mb_uv_offset,
            frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
            mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale,
            mb_col * 16, mb_row * 16);
John Koleszar's avatar
John Koleszar committed
406

407
#if CONFIG_VP9_HIGHBITDEPTH
408 409 410
        if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          int adj_strength = strength + 2 * (mbd->bd - 8);
          // Apply the filter (YUV)
411
          vp9_highbd_temporal_filter_apply(
412 413
              f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
              adj_strength, filter_weight, accumulator, count);
414
          vp9_highbd_temporal_filter_apply(
415 416 417
              f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
              mb_uv_width, mb_uv_height, adj_strength, filter_weight,
              accumulator + 256, count + 256);
418
          vp9_highbd_temporal_filter_apply(
419 420 421 422
              f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
              mb_uv_width, mb_uv_height, adj_strength, filter_weight,
              accumulator + 512, count + 512);
        } else {
423
          // Apply the filter (YUV)
424
          vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
425 426
                                    predictor, 16, 16, strength, filter_weight,
                                    accumulator, count);
427
          vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
428 429 430
                                    predictor + 256, mb_uv_width, mb_uv_height,
                                    strength, filter_weight, accumulator + 256,
                                    count + 256);
431
          vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
432 433 434
                                    predictor + 512, mb_uv_width, mb_uv_height,
                                    strength, filter_weight, accumulator + 512,
                                    count + 512);
435 436 437 438 439 440 441 442 443 444 445 446 447 448
        }
#else
        // Apply the filter (YUV)
        vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
                                  predictor, 16, 16, strength, filter_weight,
                                  accumulator, count);
        vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
                                  predictor + 256, mb_uv_width, mb_uv_height,
                                  strength, filter_weight, accumulator + 256,
                                  count + 256);
        vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
                                  predictor + 512, mb_uv_width, mb_uv_height,
                                  strength, filter_weight, accumulator + 512,
                                  count + 512);
449
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
450
      }
451
    }
John Koleszar's avatar
John Koleszar committed
452

453
#if CONFIG_VP9_HIGHBITDEPTH
454 455 456 457 458 459 460 461 462 463 464 465 466
    if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      uint16_t *dst1_16;
      uint16_t *dst2_16;
      // Normalize filter output to produce AltRef frame
      dst1 = cpi->alt_ref_buffer.y_buffer;
      dst1_16 = CONVERT_TO_SHORTPTR(dst1);
      stride = cpi->alt_ref_buffer.y_stride;
      byte = mb_y_offset;
      for (i = 0, k = 0; i < 16; i++) {
        for (j = 0; j < 16; j++, k++) {
          unsigned int pval = accumulator[k] + (count[k] >> 1);
          pval *= fixed_divide[count[k]];
          pval >>= 19;
467

468 469 470 471
          dst1_16[byte] = (uint16_t)pval;

          // move to next pixel
          byte++;
472 473
        }

474 475
        byte += stride - 16;
      }
476

477 478 479 480 481 482 483 484 485
      dst1 = cpi->alt_ref_buffer.u_buffer;
      dst2 = cpi->alt_ref_buffer.v_buffer;
      dst1_16 = CONVERT_TO_SHORTPTR(dst1);
      dst2_16 = CONVERT_TO_SHORTPTR(dst2);
      stride = cpi->alt_ref_buffer.uv_stride;
      byte = mb_uv_offset;
      for (i = 0, k = 256; i < mb_uv_height; i++) {
        for (j = 0; j < mb_uv_width; j++, k++) {
          int m = k + 256;
486

487 488 489 490 491 492 493 494 495 496 497 498 499 500
          // U
          unsigned int pval = accumulator[k] + (count[k] >> 1);
          pval *= fixed_divide[count[k]];
          pval >>= 19;
          dst1_16[byte] = (uint16_t)pval;

          // V
          pval = accumulator[m] + (count[m] >> 1);
          pval *= fixed_divide[count[m]];
          pval >>= 19;
          dst2_16[byte] = (uint16_t)pval;

          // move to next pixel
          byte++;
501
        }
502 503

        byte += stride - mb_uv_width;
504
      }
505
    } else {
John Koleszar's avatar
John Koleszar committed
506 507 508 509 510 511 512
      // Normalize filter output to produce AltRef frame
      dst1 = cpi->alt_ref_buffer.y_buffer;
      stride = cpi->alt_ref_buffer.y_stride;
      byte = mb_y_offset;
      for (i = 0, k = 0; i < 16; i++) {
        for (j = 0; j < 16; j++, k++) {
          unsigned int pval = accumulator[k] + (count[k] >> 1);
513
          pval *= fixed_divide[count[k]];
John Koleszar's avatar
John Koleszar committed
514 515
          pval >>= 19;

516
          dst1[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
517 518 519 520 521 522 523 524 525 526 527

          // move to next pixel
          byte++;
        }
        byte += stride - 16;
      }

      dst1 = cpi->alt_ref_buffer.u_buffer;
      dst2 = cpi->alt_ref_buffer.v_buffer;
      stride = cpi->alt_ref_buffer.uv_stride;
      byte = mb_uv_offset;
Alex Converse's avatar
Alex Converse committed
528
      for (i = 0, k = 256; i < mb_uv_height; i++) {
529
        for (j = 0; j < mb_uv_width; j++, k++) {
Alex Converse's avatar
Alex Converse committed
530
          int m = k + 256;
John Koleszar's avatar
John Koleszar committed
531 532 533

          // U
          unsigned int pval = accumulator[k] + (count[k] >> 1);
534
          pval *= fixed_divide[count[k]];
John Koleszar's avatar
John Koleszar committed
535
          pval >>= 19;
536
          dst1[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
537 538 539

          // V
          pval = accumulator[m] + (count[m] >> 1);
540
          pval *= fixed_divide[count[m]];
John Koleszar's avatar
John Koleszar committed
541
          pval >>= 19;
542
          dst2[byte] = (uint8_t)pval;
John Koleszar's avatar
John Koleszar committed
543 544 545

          // move to next pixel
          byte++;
Johann's avatar
Johann committed
546
        }
547
        byte += stride - mb_uv_width;
John Koleszar's avatar
John Koleszar committed
548
      }
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
    }
#else
    // Normalize filter output to produce AltRef frame
    dst1 = cpi->alt_ref_buffer.y_buffer;
    stride = cpi->alt_ref_buffer.y_stride;
    byte = mb_y_offset;
    for (i = 0, k = 0; i < 16; i++) {
      for (j = 0; j < 16; j++, k++) {
        unsigned int pval = accumulator[k] + (count[k] >> 1);
        pval *= fixed_divide[count[k]];
        pval >>= 19;

        dst1[byte] = (uint8_t)pval;

        // move to next pixel
        byte++;
      }
      byte += stride - 16;
    }

    dst1 = cpi->alt_ref_buffer.u_buffer;
    dst2 = cpi->alt_ref_buffer.v_buffer;
    stride = cpi->alt_ref_buffer.uv_stride;
    byte = mb_uv_offset;
    for (i = 0, k = 256; i < mb_uv_height; i++) {
      for (j = 0; j < mb_uv_width; j++, k++) {
        int m = k + 256;

        // U
        unsigned int pval = accumulator[k] + (count[k] >> 1);
        pval *= fixed_divide[count[k]];
        pval >>= 19;
        dst1[byte] = (uint8_t)pval;

        // V
        pval = accumulator[m] + (count[m] >> 1);
        pval *= fixed_divide[count[m]];
        pval >>= 19;
        dst2[byte] = (uint8_t)pval;

        // move to next pixel
        byte++;
      }
      byte += stride - mb_uv_width;
    }
594
#endif  // CONFIG_VP9_HIGHBITDEPTH
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
    mb_y_offset += 16;
    mb_uv_offset += mb_uv_width;
  }
}

static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row,
                                           int tile_col) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  TileInfo *tile_info =
      &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
  const int mb_row_start = (tile_info->mi_row_start) >> 1;
  const int mb_row_end = (tile_info->mi_row_end + 1) >> 1;
  const int mb_col_start = (tile_info->mi_col_start) >> 1;
  const int mb_col_end = (tile_info->mi_col_end + 1) >> 1;
  int mb_row;

  for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) {
613 614
    vp9_temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start,
                                      mb_col_end);
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
  }
}

static void temporal_filter_iterate_c(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const int tile_rows = 1 << cm->log2_tile_rows;
  int tile_row, tile_col;
  MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
  // Save input state
  uint8_t *input_buffer[MAX_MB_PLANE];
  int i;

  for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;

  vp9_init_tile_data(cpi);

  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
      temporal_filter_iterate_tile_c(cpi, tile_row, tile_col);
Johann's avatar
Johann committed
635
    }
John Koleszar's avatar
John Koleszar committed
636 637 638
  }

  // Restore input state
clang-format's avatar
clang-format committed
639
  for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
Johann's avatar
Johann committed
640 641
}

642
// Apply buffer limits and context specific adjustments to arnr filter.
clang-format's avatar
clang-format committed
643
static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
644 645
                               int *arnr_frames, int *arnr_strength) {
  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
646
  const int frames_after_arf =
647
      vp9_lookahead_depth(cpi->lookahead) - distance - 1;
648 649
  int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
  int frames_bwd;
650 651 652 653
  int q, frames, base_strength, strength;

  // Context dependent two pass adjustment to strength.
  if (oxcf->pass == 2) {
clang-format's avatar
clang-format committed
654
    base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment;
655 656 657 658 659
    // Clip to allowed range.
    base_strength = VPXMIN(6, VPXMAX(0, base_strength));
  } else {
    base_strength = oxcf->arnr_strength;
  }
John Koleszar's avatar
John Koleszar committed
660

661
  // Define the forward and backwards filter limits for this arnr group.
clang-format's avatar
clang-format committed
662 663
  if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
  if (frames_fwd > distance) frames_fwd = distance;
664

665
  frames_bwd = frames_fwd;
666

667 668
  // For even length filter there is one more frame backward
  // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
clang-format's avatar
clang-format committed
669
  if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
670

671
  // Set the baseline active filter size.
672
  frames = frames_bwd + 1 + frames_fwd;
673

674
  // Adjust the strength based on active max q.
675
  if (cpi->common.current_video_frame > 1)
clang-format's avatar
clang-format committed
676 677
    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
                                      cpi->common.bit_depth));
678
  else
clang-format's avatar
clang-format committed
679 680
    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
                                      cpi->common.bit_depth));
681
  if (q > 16) {
682
    strength = base_strength;
683
  } else {
684
    strength = base_strength - ((16 - q) / 2);
clang-format's avatar
clang-format committed
685
    if (strength < 0) strength = 0;
686 687 688
  }

  // Adjust number of frames in filter and strength based on gf boost level.
689 690 691
  if (frames > group_boost / 150) {
    frames = group_boost / 150;
    frames += !(frames & 1);
692
  }
693 694 695

  if (strength > group_boost / 300) {
    strength = group_boost / 300;
696
  }
697 698

  // Adjustments for second level arf in multi arf case.
699
  if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
700 701
    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
    if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
702
      strength >>= 1;
703 704
    }
  }
705 706 707

  *arnr_frames = frames;
  *arnr_strength = strength;
708 709 710 711 712
}

void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
  VP9_COMMON *const cm = &cpi->common;
  RATE_CONTROL *const rc = &cpi->rc;
713
  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
714
  ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data;
715 716 717 718 719 720
  int frame;
  int frames_to_blur;
  int start_frame;
  int strength;
  int frames_to_blur_backward;
  int frames_to_blur_forward;
721 722
  struct scale_factors *sf = &arnr_filter_data->sf;
  YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames;
723
  int rdmult;
724 725

  // Apply context specific adjustments to the arnr filter parameters.
726
  adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
727 728 729 730
  frames_to_blur_backward = (frames_to_blur / 2);
  frames_to_blur_forward = ((frames_to_blur - 1) / 2);
  start_frame = distance + frames_to_blur_forward;

731 732 733 734
  arnr_filter_data->strength = strength;
  arnr_filter_data->frame_count = frames_to_blur;
  arnr_filter_data->alt_ref_index = frames_to_blur_backward;

735 736 737
  // Setup frame pointers, NULL indicates frame not included in filter.
  for (frame = 0; frame < frames_to_blur; ++frame) {
    const int which_buffer = start_frame - frame;
clang-format's avatar
clang-format committed
738 739
    struct lookahead_entry *buf =
        vp9_lookahead_peek(cpi->lookahead, which_buffer);
740
    frames[frames_to_blur - 1 - frame] = &buf->img;
741 742
  }

743 744 745
  if (frames_to_blur > 0) {
    // Setup scaling factors. Scaling on each of the arnr frames is not
    // supported.
746
    if (cpi->use_svc) {
747 748 749
      // In spatial svc the scaling factors might be less then 1/2.
      // So we will use non-normative scaling.
      int frame_used = 0;
750
#if CONFIG_VP9_HIGHBITDEPTH
751
      vp9_setup_scale_factors_for_frame(
752
          sf, get_frame_new_buffer(cm)->y_crop_width,
753 754
          get_frame_new_buffer(cm)->y_crop_height,
          get_frame_new_buffer(cm)->y_crop_width,
clang-format's avatar
clang-format committed
755
          get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth);
756
#else
757
      vp9_setup_scale_factors_for_frame(
758
          sf, get_frame_new_buffer(cm)->y_crop_width,
759 760 761
          get_frame_new_buffer(cm)->y_crop_height,
          get_frame_new_buffer(cm)->y_crop_width,
          get_frame_new_buffer(cm)->y_crop_height);
762 763
#endif  // CONFIG_VP9_HIGHBITDEPTH

764 765 766
      for (frame = 0; frame < frames_to_blur; ++frame) {
        if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
            cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
767
          if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
clang-format's avatar
clang-format committed
768 769
                                       cm->width, cm->height, cm->subsampling_x,
                                       cm->subsampling_y,
770
#if CONFIG_VP9_HIGHBITDEPTH
771
                                       cm->use_highbitdepth,
772
#endif
773
                                       VP9_ENC_BORDER_IN_PIXELS,
clang-format's avatar
clang-format committed
774
                                       cm->byte_alignment, NULL, NULL, NULL)) {
775 776 777 778
            vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                               "Failed to reallocate alt_ref_buffer");
          }
          frames[frame] = vp9_scale_if_required(
779 780
              cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0,
              EIGHTTAP, 0);
781 782
          ++frame_used;
        }
783
      }
784
      cm->mi = cm->mip + cm->mi_stride + 1;
785 786
      xd->mi = cm->mi_grid_visible;
      xd->mi[0] = cm->mi;
787
    } else {
clang-format's avatar
clang-format committed
788
// ARF is produced at the native frame size and resized when coded.
789
#if CONFIG_VP9_HIGHBITDEPTH
clang-format's avatar
clang-format committed
790
      vp9_setup_scale_factors_for_frame(
791
          sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
clang-format's avatar
clang-format committed
792 793
          frames[0]->y_crop_width, frames[0]->y_crop_height,
          cm->use_highbitdepth);
794
#else
clang-format's avatar
clang-format committed
795
      vp9_setup_scale_factors_for_frame(
796
          sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
clang-format's avatar
clang-format committed
797
          frames[0]->y_crop_width, frames[0]->y_crop_height);
798
#endif  // CONFIG_VP9_HIGHBITDEPTH
799
    }
800 801
  }

802 803 804 805 806 807
  // Initialize errorperbit and sabperbit.
  rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
  if (rdmult < 1) rdmult = 1;
  set_error_per_bit(&cpi->td.mb, rdmult);
  vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);

808
  if (!cpi->row_mt)
809 810 811
    temporal_filter_iterate_c(cpi);
  else
    vp9_temporal_filter_row_mt(cpi);
812
}